1bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor/* 21d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Copyright (C) 2008 The Guava Authors 3bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 4bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * Licensed under the Apache License, Version 2.0 (the "License"); 5bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * you may not use this file except in compliance with the License. 6bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * You may obtain a copy of the License at 7bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 8bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * http://www.apache.org/licenses/LICENSE-2.0 9bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 10bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * Unless required by applicable law or agreed to in writing, software 11bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * distributed under the License is distributed on an "AS IS" BASIS, 12bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * See the License for the specific language governing permissions and 14bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * limitations under the License. 15bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 16bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 17bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorpackage com.google.common.base; 18bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 19bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport static com.google.common.base.Preconditions.checkArgument; 20bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport static com.google.common.base.Preconditions.checkNotNull; 21bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport com.google.common.annotations.Beta; 231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport com.google.common.annotations.GwtCompatible; 247dd252788645e940eada959bdde927426e2531c9Paul Duffinimport com.google.common.annotations.GwtIncompatible; 251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 26bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorimport java.util.Arrays; 277dd252788645e940eada959bdde927426e2531c9Paul Duffinimport java.util.BitSet; 28bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertimport javax.annotation.CheckReturnValue; 301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 31bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor/** 321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does 331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * for any {@link Object}. Also offers basic text processing methods based on this function. 341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Implementations are strongly encouraged to be side-effect-free and immutable. 351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * 361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean 371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * "any character {@code c} for which {@code this.matches(c)} returns {@code true}". 38bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand 401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical 411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher} 421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * treats these just as two separate characters. 43bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>Example usages: <pre> 451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * String trimmed = {@link #WHITESPACE WHITESPACE}.{@link #trimFrom trimFrom}(userInput); 461d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * if ({@link #ASCII ASCII}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre> 47bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 487dd252788645e940eada959bdde927426e2531c9Paul Duffin * <p>See the Guava User Guide article on <a href= 497dd252788645e940eada959bdde927426e2531c9Paul Duffin * "http://code.google.com/p/guava-libraries/wiki/StringsExplained#CharMatcher"> 507dd252788645e940eada959bdde927426e2531c9Paul Duffin * {@code CharMatcher}</a>. 517dd252788645e940eada959bdde927426e2531c9Paul Duffin * 52bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @author Kevin Bourrillion 531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @since 1.0 54bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 550888a09821a98ac0680fad765217302858e70fa4Paul Duffin@Beta // Possibly change from chars to code points; decide constants vs. methods 567dd252788645e940eada959bdde927426e2531c9Paul Duffin@GwtCompatible(emulated = true) 57bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnorpublic abstract class CharMatcher implements Predicate<Character> { 580888a09821a98ac0680fad765217302858e70fa4Paul Duffin 59bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor // Constants 60bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Determines whether a character is a breaking whitespace (that is, a whitespace which can be 621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a 631d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * discussion of that term. 64bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @since 2.0 66bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 677dd252788645e940eada959bdde927426e2531c9Paul Duffin public static final CharMatcher BREAKING_WHITESPACE = new CharMatcher() { 687dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 697dd252788645e940eada959bdde927426e2531c9Paul Duffin public boolean matches(char c) { 707dd252788645e940eada959bdde927426e2531c9Paul Duffin switch (c) { 717dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\t': 727dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\n': 737dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\013': 747dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\f': 757dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\r': 767dd252788645e940eada959bdde927426e2531c9Paul Duffin case ' ': 777dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\u0085': 787dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\u1680': 797dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\u2028': 807dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\u2029': 817dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\u205f': 827dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\u3000': 837dd252788645e940eada959bdde927426e2531c9Paul Duffin return true; 847dd252788645e940eada959bdde927426e2531c9Paul Duffin case '\u2007': 857dd252788645e940eada959bdde927426e2531c9Paul Duffin return false; 867dd252788645e940eada959bdde927426e2531c9Paul Duffin default: 877dd252788645e940eada959bdde927426e2531c9Paul Duffin return c >= '\u2000' && c <= '\u200a'; 887dd252788645e940eada959bdde927426e2531c9Paul Duffin } 897dd252788645e940eada959bdde927426e2531c9Paul Duffin } 907dd252788645e940eada959bdde927426e2531c9Paul Duffin 917dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 927dd252788645e940eada959bdde927426e2531c9Paul Duffin public String toString() { 937dd252788645e940eada959bdde927426e2531c9Paul Duffin return "CharMatcher.BREAKING_WHITESPACE"; 947dd252788645e940eada959bdde927426e2531c9Paul Duffin } 957dd252788645e940eada959bdde927426e2531c9Paul Duffin }; 96bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 97bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 981d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Determines whether a character is ASCII, meaning that its code point is less than 128. 99bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 1007dd252788645e940eada959bdde927426e2531c9Paul Duffin public static final CharMatcher ASCII = inRange('\0', '\u007f', "CharMatcher.ASCII"); 1017dd252788645e940eada959bdde927426e2531c9Paul Duffin 1027dd252788645e940eada959bdde927426e2531c9Paul Duffin private static class RangesMatcher extends CharMatcher { 1037dd252788645e940eada959bdde927426e2531c9Paul Duffin private final char[] rangeStarts; 1047dd252788645e940eada959bdde927426e2531c9Paul Duffin private final char[] rangeEnds; 1057dd252788645e940eada959bdde927426e2531c9Paul Duffin 1067dd252788645e940eada959bdde927426e2531c9Paul Duffin RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) { 1077dd252788645e940eada959bdde927426e2531c9Paul Duffin super(description); 1087dd252788645e940eada959bdde927426e2531c9Paul Duffin this.rangeStarts = rangeStarts; 1097dd252788645e940eada959bdde927426e2531c9Paul Duffin this.rangeEnds = rangeEnds; 1107dd252788645e940eada959bdde927426e2531c9Paul Duffin checkArgument(rangeStarts.length == rangeEnds.length); 1117dd252788645e940eada959bdde927426e2531c9Paul Duffin for (int i = 0; i < rangeStarts.length; i++) { 1127dd252788645e940eada959bdde927426e2531c9Paul Duffin checkArgument(rangeStarts[i] <= rangeEnds[i]); 1137dd252788645e940eada959bdde927426e2531c9Paul Duffin if (i + 1 < rangeStarts.length) { 1147dd252788645e940eada959bdde927426e2531c9Paul Duffin checkArgument(rangeEnds[i] < rangeStarts[i + 1]); 1157dd252788645e940eada959bdde927426e2531c9Paul Duffin } 1167dd252788645e940eada959bdde927426e2531c9Paul Duffin } 1177dd252788645e940eada959bdde927426e2531c9Paul Duffin } 1187dd252788645e940eada959bdde927426e2531c9Paul Duffin 1197dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 1207dd252788645e940eada959bdde927426e2531c9Paul Duffin public boolean matches(char c) { 1217dd252788645e940eada959bdde927426e2531c9Paul Duffin int index = Arrays.binarySearch(rangeStarts, c); 1227dd252788645e940eada959bdde927426e2531c9Paul Duffin if (index >= 0) { 1237dd252788645e940eada959bdde927426e2531c9Paul Duffin return true; 1247dd252788645e940eada959bdde927426e2531c9Paul Duffin } else { 1257dd252788645e940eada959bdde927426e2531c9Paul Duffin index = ~index - 1; 1267dd252788645e940eada959bdde927426e2531c9Paul Duffin return index >= 0 && c <= rangeEnds[index]; 1277dd252788645e940eada959bdde927426e2531c9Paul Duffin } 1287dd252788645e940eada959bdde927426e2531c9Paul Duffin } 1297dd252788645e940eada959bdde927426e2531c9Paul Duffin } 1307dd252788645e940eada959bdde927426e2531c9Paul Duffin 1317dd252788645e940eada959bdde927426e2531c9Paul Duffin // Must be in ascending order. 1327dd252788645e940eada959bdde927426e2531c9Paul Duffin private static final String ZEROES = "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6" 1337dd252788645e940eada959bdde927426e2531c9Paul Duffin + "\u0c66\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1b50\u1bb0" 1347dd252788645e940eada959bdde927426e2531c9Paul Duffin + "\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10"; 1357dd252788645e940eada959bdde927426e2531c9Paul Duffin 1367dd252788645e940eada959bdde927426e2531c9Paul Duffin private static final String NINES; 1377dd252788645e940eada959bdde927426e2531c9Paul Duffin static { 1387dd252788645e940eada959bdde927426e2531c9Paul Duffin StringBuilder builder = new StringBuilder(ZEROES.length()); 1397dd252788645e940eada959bdde927426e2531c9Paul Duffin for (int i = 0; i < ZEROES.length(); i++) { 1407dd252788645e940eada959bdde927426e2531c9Paul Duffin builder.append((char) (ZEROES.charAt(i) + 9)); 1417dd252788645e940eada959bdde927426e2531c9Paul Duffin } 1427dd252788645e940eada959bdde927426e2531c9Paul Duffin NINES = builder.toString(); 1437dd252788645e940eada959bdde927426e2531c9Paul Duffin } 144bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 145bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 146bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * Determines whether a character is a digit according to 147bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. 1480888a09821a98ac0680fad765217302858e70fa4Paul Duffin * If you only care to match ASCII digits, you can use {@code inRange('0', '9')}. 149bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 1500888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static final CharMatcher DIGIT = new RangesMatcher( 1510888a09821a98ac0680fad765217302858e70fa4Paul Duffin "CharMatcher.DIGIT", ZEROES.toCharArray(), NINES.toCharArray()); 152bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 153bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 1540888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Determines whether a character is a digit according to {@linkplain Character#isDigit(char) 1550888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Java's definition}. If you only care to match ASCII digits, you can use {@code 1560888a09821a98ac0680fad765217302858e70fa4Paul Duffin * inRange('0', '9')}. 157bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 1587dd252788645e940eada959bdde927426e2531c9Paul Duffin public static final CharMatcher JAVA_DIGIT = new CharMatcher("CharMatcher.JAVA_DIGIT") { 1590888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 160bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return Character.isDigit(c); 161bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 162bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor }; 163bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 164bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 1650888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Determines whether a character is a letter according to {@linkplain Character#isLetter(char) 1660888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Java's definition}. If you only care to match letters of the Latin alphabet, you can use {@code 167bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * inRange('a', 'z').or(inRange('A', 'Z'))}. 168bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 1697dd252788645e940eada959bdde927426e2531c9Paul Duffin public static final CharMatcher JAVA_LETTER = new CharMatcher("CharMatcher.JAVA_LETTER") { 1700888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 171bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return Character.isLetter(c); 172bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 173bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor }; 174bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 175bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 1760888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Determines whether a character is a letter or digit according to {@linkplain 177bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * Character#isLetterOrDigit(char) Java's definition}. 178bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 1790888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static final CharMatcher JAVA_LETTER_OR_DIGIT = 1800888a09821a98ac0680fad765217302858e70fa4Paul Duffin new CharMatcher("CharMatcher.JAVA_LETTER_OR_DIGIT") { 1810888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 182bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return Character.isLetterOrDigit(c); 183bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 184bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor }; 185bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 186bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 1870888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Determines whether a character is upper case according to {@linkplain 1880888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Character#isUpperCase(char) Java's definition}. 189bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 1900888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static final CharMatcher JAVA_UPPER_CASE = 1910888a09821a98ac0680fad765217302858e70fa4Paul Duffin new CharMatcher("CharMatcher.JAVA_UPPER_CASE") { 1920888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 193bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return Character.isUpperCase(c); 194bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 195bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor }; 196bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 197bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 1980888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Determines whether a character is lower case according to {@linkplain 1990888a09821a98ac0680fad765217302858e70fa4Paul Duffin * Character#isLowerCase(char) Java's definition}. 200bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 2010888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static final CharMatcher JAVA_LOWER_CASE = 2020888a09821a98ac0680fad765217302858e70fa4Paul Duffin new CharMatcher("CharMatcher.JAVA_LOWER_CASE") { 2030888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 204bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return Character.isLowerCase(c); 205bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 206bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor }; 207bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 208bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 2091d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Determines whether a character is an ISO control character as specified by {@link 2101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Character#isISOControl(char)}. 211bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 2120888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static final CharMatcher JAVA_ISO_CONTROL = 2130888a09821a98ac0680fad765217302858e70fa4Paul Duffin inRange('\u0000', '\u001f') 2140888a09821a98ac0680fad765217302858e70fa4Paul Duffin .or(inRange('\u007f', '\u009f')) 2150888a09821a98ac0680fad765217302858e70fa4Paul Duffin .withToString("CharMatcher.JAVA_ISO_CONTROL"); 216bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 217bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 2181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Determines whether a character is invisible; that is, if its Unicode category is any of 2191d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and 2201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * PRIVATE_USE according to ICU4J. 221bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 2220888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static final CharMatcher INVISIBLE = new RangesMatcher("CharMatcher.INVISIBLE", ( 2230888a09821a98ac0680fad765217302858e70fa4Paul Duffin "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u1680\u180e\u2000\u2028\u205f\u2066\u2067\u2068" 2240888a09821a98ac0680fad765217302858e70fa4Paul Duffin + "\u2069\u206a\u3000\ud800\ufeff\ufff9\ufffa").toCharArray(), ( 2250888a09821a98ac0680fad765217302858e70fa4Paul Duffin "\u0020\u00a0\u00ad\u0604\u061c\u06dd\u070f\u1680\u180e\u200f\u202f\u2064\u2066\u2067\u2068" 2260888a09821a98ac0680fad765217302858e70fa4Paul Duffin + "\u2069\u206f\u3000\uf8ff\ufeff\ufff9\ufffb").toCharArray()); 2277dd252788645e940eada959bdde927426e2531c9Paul Duffin 2287dd252788645e940eada959bdde927426e2531c9Paul Duffin private static String showCharacter(char c) { 2297dd252788645e940eada959bdde927426e2531c9Paul Duffin String hex = "0123456789ABCDEF"; 2300888a09821a98ac0680fad765217302858e70fa4Paul Duffin char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'}; 2317dd252788645e940eada959bdde927426e2531c9Paul Duffin for (int i = 0; i < 4; i++) { 2327dd252788645e940eada959bdde927426e2531c9Paul Duffin tmp[5 - i] = hex.charAt(c & 0xF); 2337dd252788645e940eada959bdde927426e2531c9Paul Duffin c >>= 4; 2347dd252788645e940eada959bdde927426e2531c9Paul Duffin } 2357dd252788645e940eada959bdde927426e2531c9Paul Duffin return String.copyValueOf(tmp); 2367dd252788645e940eada959bdde927426e2531c9Paul Duffin 2377dd252788645e940eada959bdde927426e2531c9Paul Duffin } 238bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 239bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 2401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Determines whether a character is single-width (not double-width). When in doubt, this matcher 2411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * errs on the side of returning {@code false} (that is, it tends to assume a character is 2421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * double-width). 243bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 2441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to 2451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * date. 246bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 2477dd252788645e940eada959bdde927426e2531c9Paul Duffin public static final CharMatcher SINGLE_WIDTH = new RangesMatcher("CharMatcher.SINGLE_WIDTH", 2487dd252788645e940eada959bdde927426e2531c9Paul Duffin "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(), 2497dd252788645e940eada959bdde927426e2531c9Paul Duffin "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray()); 250bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 251bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** Matches any character. */ 2520888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static final CharMatcher ANY = 2530888a09821a98ac0680fad765217302858e70fa4Paul Duffin new FastMatcher("CharMatcher.ANY") { 2540888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 2550888a09821a98ac0680fad765217302858e70fa4Paul Duffin return true; 2560888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2571d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 2580888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int indexIn(CharSequence sequence) { 2590888a09821a98ac0680fad765217302858e70fa4Paul Duffin return (sequence.length() == 0) ? -1 : 0; 2600888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 2620888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int indexIn(CharSequence sequence, int start) { 2630888a09821a98ac0680fad765217302858e70fa4Paul Duffin int length = sequence.length(); 2640888a09821a98ac0680fad765217302858e70fa4Paul Duffin Preconditions.checkPositionIndex(start, length); 2650888a09821a98ac0680fad765217302858e70fa4Paul Duffin return (start == length) ? -1 : start; 2660888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2671d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 2680888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int lastIndexIn(CharSequence sequence) { 2690888a09821a98ac0680fad765217302858e70fa4Paul Duffin return sequence.length() - 1; 2700888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 2720888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matchesAllOf(CharSequence sequence) { 2730888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(sequence); 2740888a09821a98ac0680fad765217302858e70fa4Paul Duffin return true; 2750888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2761d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 2770888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matchesNoneOf(CharSequence sequence) { 2780888a09821a98ac0680fad765217302858e70fa4Paul Duffin return sequence.length() == 0; 2790888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 2810888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String removeFrom(CharSequence sequence) { 2820888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(sequence); 2830888a09821a98ac0680fad765217302858e70fa4Paul Duffin return ""; 2840888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 2860888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String replaceFrom(CharSequence sequence, char replacement) { 2870888a09821a98ac0680fad765217302858e70fa4Paul Duffin char[] array = new char[sequence.length()]; 2880888a09821a98ac0680fad765217302858e70fa4Paul Duffin Arrays.fill(array, replacement); 2890888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new String(array); 2900888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2911d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 2920888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) { 2930888a09821a98ac0680fad765217302858e70fa4Paul Duffin StringBuilder retval = new StringBuilder(sequence.length() * replacement.length()); 2940888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int i = 0; i < sequence.length(); i++) { 2950888a09821a98ac0680fad765217302858e70fa4Paul Duffin retval.append(replacement); 2960888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2970888a09821a98ac0680fad765217302858e70fa4Paul Duffin return retval.toString(); 2980888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 2991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3000888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String collapseFrom(CharSequence sequence, char replacement) { 3010888a09821a98ac0680fad765217302858e70fa4Paul Duffin return (sequence.length() == 0) ? "" : String.valueOf(replacement); 3020888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3031d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3040888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String trimFrom(CharSequence sequence) { 3050888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(sequence); 3060888a09821a98ac0680fad765217302858e70fa4Paul Duffin return ""; 3070888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3081d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3090888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int countIn(CharSequence sequence) { 3100888a09821a98ac0680fad765217302858e70fa4Paul Duffin return sequence.length(); 3110888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3130888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher and(CharMatcher other) { 3140888a09821a98ac0680fad765217302858e70fa4Paul Duffin return checkNotNull(other); 3150888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3161d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3170888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher or(CharMatcher other) { 3180888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(other); 3190888a09821a98ac0680fad765217302858e70fa4Paul Duffin return this; 3200888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 321dbd967a6e5c96cc1a97c5521f88dc1564ba2f81bPaul Duffin 3220888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher negate() { 3230888a09821a98ac0680fad765217302858e70fa4Paul Duffin return NONE; 3240888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3250888a09821a98ac0680fad765217302858e70fa4Paul Duffin }; 326bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 327bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** Matches no characters. */ 3280888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static final CharMatcher NONE = 3290888a09821a98ac0680fad765217302858e70fa4Paul Duffin new FastMatcher("CharMatcher.NONE") { 3300888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 3310888a09821a98ac0680fad765217302858e70fa4Paul Duffin return false; 3320888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3340888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int indexIn(CharSequence sequence) { 3350888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(sequence); 3360888a09821a98ac0680fad765217302858e70fa4Paul Duffin return -1; 3370888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3390888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int indexIn(CharSequence sequence, int start) { 3400888a09821a98ac0680fad765217302858e70fa4Paul Duffin int length = sequence.length(); 3410888a09821a98ac0680fad765217302858e70fa4Paul Duffin Preconditions.checkPositionIndex(start, length); 3420888a09821a98ac0680fad765217302858e70fa4Paul Duffin return -1; 3430888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3450888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int lastIndexIn(CharSequence sequence) { 3460888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(sequence); 3470888a09821a98ac0680fad765217302858e70fa4Paul Duffin return -1; 3480888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3500888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matchesAllOf(CharSequence sequence) { 3510888a09821a98ac0680fad765217302858e70fa4Paul Duffin return sequence.length() == 0; 3520888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3540888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matchesNoneOf(CharSequence sequence) { 3550888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(sequence); 3560888a09821a98ac0680fad765217302858e70fa4Paul Duffin return true; 3570888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3590888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String removeFrom(CharSequence sequence) { 3600888a09821a98ac0680fad765217302858e70fa4Paul Duffin return sequence.toString(); 3610888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3630888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String replaceFrom(CharSequence sequence, char replacement) { 3640888a09821a98ac0680fad765217302858e70fa4Paul Duffin return sequence.toString(); 3650888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3670888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) { 3680888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(replacement); 3690888a09821a98ac0680fad765217302858e70fa4Paul Duffin return sequence.toString(); 3700888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3720888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String collapseFrom(CharSequence sequence, char replacement) { 3730888a09821a98ac0680fad765217302858e70fa4Paul Duffin return sequence.toString(); 3740888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3751d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3760888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String trimFrom(CharSequence sequence) { 3770888a09821a98ac0680fad765217302858e70fa4Paul Duffin return sequence.toString(); 3780888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3791d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3800888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override 3810888a09821a98ac0680fad765217302858e70fa4Paul Duffin public String trimLeadingFrom(CharSequence sequence) { 3820888a09821a98ac0680fad765217302858e70fa4Paul Duffin return sequence.toString(); 3830888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3841d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 3850888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override 3860888a09821a98ac0680fad765217302858e70fa4Paul Duffin public String trimTrailingFrom(CharSequence sequence) { 3870888a09821a98ac0680fad765217302858e70fa4Paul Duffin return sequence.toString(); 3880888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 389dbd967a6e5c96cc1a97c5521f88dc1564ba2f81bPaul Duffin 3900888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int countIn(CharSequence sequence) { 3910888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(sequence); 3920888a09821a98ac0680fad765217302858e70fa4Paul Duffin return 0; 3930888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 394dbd967a6e5c96cc1a97c5521f88dc1564ba2f81bPaul Duffin 3950888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher and(CharMatcher other) { 3960888a09821a98ac0680fad765217302858e70fa4Paul Duffin checkNotNull(other); 3970888a09821a98ac0680fad765217302858e70fa4Paul Duffin return this; 3980888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 3990888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4000888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher or(CharMatcher other) { 4010888a09821a98ac0680fad765217302858e70fa4Paul Duffin return checkNotNull(other); 4020888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 4030888a09821a98ac0680fad765217302858e70fa4Paul Duffin 4040888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher negate() { 4050888a09821a98ac0680fad765217302858e70fa4Paul Duffin return ANY; 4060888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 4070888a09821a98ac0680fad765217302858e70fa4Paul Duffin }; 408bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 409bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor // Static factories 410bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 411bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 412bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * Returns a {@code char} matcher that matches only one specified character. 413bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 414bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public static CharMatcher is(final char match) { 4157dd252788645e940eada959bdde927426e2531c9Paul Duffin String description = "CharMatcher.is('" + showCharacter(match) + "')"; 4167dd252788645e940eada959bdde927426e2531c9Paul Duffin return new FastMatcher(description) { 4170888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 418bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return c == match; 419bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 420bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 4210888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public String replaceFrom(CharSequence sequence, char replacement) { 422bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return sequence.toString().replace(match, replacement); 423bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 4241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 4250888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher and(CharMatcher other) { 426bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return other.matches(match) ? this : NONE; 427bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 4281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 4290888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher or(CharMatcher other) { 430bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return other.matches(match) ? other : super.or(other); 431bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 4321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 4330888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher negate() { 434bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return isNot(match); 435bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 4361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 4377dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 4380888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override 4397dd252788645e940eada959bdde927426e2531c9Paul Duffin void setBits(BitSet table) { 440bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor table.set(match); 441bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 442bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor }; 443bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 444bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 445bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 4461d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a {@code char} matcher that matches any character except the one specified. 447bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 448bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * <p>To negate another {@code CharMatcher}, use {@link #negate()}. 449bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 450bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public static CharMatcher isNot(final char match) { 4510888a09821a98ac0680fad765217302858e70fa4Paul Duffin String description = "CharMatcher.isNot('" + showCharacter(match) + "')"; 4527dd252788645e940eada959bdde927426e2531c9Paul Duffin return new FastMatcher(description) { 4530888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 454bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return c != match; 455bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 456bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 4570888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher and(CharMatcher other) { 458bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return other.matches(match) ? super.and(other) : other; 459bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 4601d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 4610888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher or(CharMatcher other) { 462bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return other.matches(match) ? ANY : this; 463bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 4641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 4657dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 4660888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override 4677dd252788645e940eada959bdde927426e2531c9Paul Duffin void setBits(BitSet table) { 4687dd252788645e940eada959bdde927426e2531c9Paul Duffin table.set(0, match); 4697dd252788645e940eada959bdde927426e2531c9Paul Duffin table.set(match + 1, Character.MAX_VALUE + 1); 4707dd252788645e940eada959bdde927426e2531c9Paul Duffin } 4717dd252788645e940eada959bdde927426e2531c9Paul Duffin 4720888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher negate() { 473bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return is(match); 474bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 475bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor }; 476bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 477bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 478bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 4791d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a {@code char} matcher that matches any character present in the given character 4801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * sequence. 481bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 482bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public static CharMatcher anyOf(final CharSequence sequence) { 483bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor switch (sequence.length()) { 484bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor case 0: 485bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return NONE; 486bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor case 1: 487bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return is(sequence.charAt(0)); 488bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor case 2: 4897dd252788645e940eada959bdde927426e2531c9Paul Duffin return isEither(sequence.charAt(0), sequence.charAt(1)); 4907dd252788645e940eada959bdde927426e2531c9Paul Duffin default: 4917dd252788645e940eada959bdde927426e2531c9Paul Duffin // continue below to handle the general case 492bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 4937dd252788645e940eada959bdde927426e2531c9Paul Duffin // TODO(user): is it potentially worth just going ahead and building a precomputed matcher? 494bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor final char[] chars = sequence.toString().toCharArray(); 4957dd252788645e940eada959bdde927426e2531c9Paul Duffin Arrays.sort(chars); 4967dd252788645e940eada959bdde927426e2531c9Paul Duffin StringBuilder description = new StringBuilder("CharMatcher.anyOf(\""); 4977dd252788645e940eada959bdde927426e2531c9Paul Duffin for (char c : chars) { 4987dd252788645e940eada959bdde927426e2531c9Paul Duffin description.append(showCharacter(c)); 4997dd252788645e940eada959bdde927426e2531c9Paul Duffin } 5007dd252788645e940eada959bdde927426e2531c9Paul Duffin description.append("\")"); 5017dd252788645e940eada959bdde927426e2531c9Paul Duffin return new CharMatcher(description.toString()) { 5020888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 503bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return Arrays.binarySearch(chars, c) >= 0; 504bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 5051d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 5067dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 5077dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 5087dd252788645e940eada959bdde927426e2531c9Paul Duffin void setBits(BitSet table) { 509bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor for (char c : chars) { 510bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor table.set(c); 511bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 512bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 513bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor }; 514bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 515bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 5160888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static CharMatcher isEither( 5170888a09821a98ac0680fad765217302858e70fa4Paul Duffin final char match1, 5180888a09821a98ac0680fad765217302858e70fa4Paul Duffin final char match2) { 5190888a09821a98ac0680fad765217302858e70fa4Paul Duffin String description = "CharMatcher.anyOf(\"" + 5200888a09821a98ac0680fad765217302858e70fa4Paul Duffin showCharacter(match1) + showCharacter(match2) + "\")"; 5217dd252788645e940eada959bdde927426e2531c9Paul Duffin return new FastMatcher(description) { 5220888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 5237dd252788645e940eada959bdde927426e2531c9Paul Duffin return c == match1 || c == match2; 5247dd252788645e940eada959bdde927426e2531c9Paul Duffin } 5257dd252788645e940eada959bdde927426e2531c9Paul Duffin 5267dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 5270888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override void setBits(BitSet table) { 5287dd252788645e940eada959bdde927426e2531c9Paul Duffin table.set(match1); 5297dd252788645e940eada959bdde927426e2531c9Paul Duffin table.set(match2); 5307dd252788645e940eada959bdde927426e2531c9Paul Duffin } 5317dd252788645e940eada959bdde927426e2531c9Paul Duffin }; 5327dd252788645e940eada959bdde927426e2531c9Paul Duffin } 5337dd252788645e940eada959bdde927426e2531c9Paul Duffin 534bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 5351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a {@code char} matcher that matches any character not present in the given character 5361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * sequence. 537bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 538bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public static CharMatcher noneOf(CharSequence sequence) { 539bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return anyOf(sequence).negate(); 540bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 541bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 542bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 5431d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a {@code char} matcher that matches any character in a given range (both endpoints are 5441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code 5451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * CharMatcher.inRange('a', 'z')}. 546bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 547bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @throws IllegalArgumentException if {@code endInclusive < startInclusive} 548bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 5491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert public static CharMatcher inRange(final char startInclusive, final char endInclusive) { 550bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor checkArgument(endInclusive >= startInclusive); 5510888a09821a98ac0680fad765217302858e70fa4Paul Duffin String description = "CharMatcher.inRange('" + 5520888a09821a98ac0680fad765217302858e70fa4Paul Duffin showCharacter(startInclusive) + "', '" + 5530888a09821a98ac0680fad765217302858e70fa4Paul Duffin showCharacter(endInclusive) + "')"; 5547dd252788645e940eada959bdde927426e2531c9Paul Duffin return inRange(startInclusive, endInclusive, description); 5557dd252788645e940eada959bdde927426e2531c9Paul Duffin } 5561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 5570888a09821a98ac0680fad765217302858e70fa4Paul Duffin static CharMatcher inRange(final char startInclusive, final char endInclusive, 5580888a09821a98ac0680fad765217302858e70fa4Paul Duffin String description) { 5597dd252788645e940eada959bdde927426e2531c9Paul Duffin return new FastMatcher(description) { 5600888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 5617dd252788645e940eada959bdde927426e2531c9Paul Duffin return startInclusive <= c && c <= endInclusive; 562dbd967a6e5c96cc1a97c5521f88dc1564ba2f81bPaul Duffin } 563dbd967a6e5c96cc1a97c5521f88dc1564ba2f81bPaul Duffin 5647dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 5650888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override void setBits(BitSet table) { 5667dd252788645e940eada959bdde927426e2531c9Paul Duffin table.set(startInclusive, endInclusive + 1); 567bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 568bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor }; 569bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 570bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 571bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 5721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but 5731d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * which operates on primitive {@code char} instances instead. 574bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 5751d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert public static CharMatcher forPredicate(final Predicate<? super Character> predicate) { 576bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor checkNotNull(predicate); 577bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (predicate instanceof CharMatcher) { 578bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return (CharMatcher) predicate; 579bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 5807dd252788645e940eada959bdde927426e2531c9Paul Duffin String description = "CharMatcher.forPredicate(" + predicate + ")"; 5817dd252788645e940eada959bdde927426e2531c9Paul Duffin return new CharMatcher(description) { 5820888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 583bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return predicate.apply(c); 584bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 5851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 5860888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean apply(Character character) { 587bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return predicate.apply(checkNotNull(character)); 588bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 589bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor }; 590bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 591bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 5927dd252788645e940eada959bdde927426e2531c9Paul Duffin // State 5937dd252788645e940eada959bdde927426e2531c9Paul Duffin final String description; 5947dd252788645e940eada959bdde927426e2531c9Paul Duffin 5951d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert // Constructors 5961d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 5971d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /** 5987dd252788645e940eada959bdde927426e2531c9Paul Duffin * Sets the {@code toString()} from the given description. 5991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert */ 6007dd252788645e940eada959bdde927426e2531c9Paul Duffin CharMatcher(String description) { 6017dd252788645e940eada959bdde927426e2531c9Paul Duffin this.description = description; 6027dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6037dd252788645e940eada959bdde927426e2531c9Paul Duffin 6047dd252788645e940eada959bdde927426e2531c9Paul Duffin /** 6057dd252788645e940eada959bdde927426e2531c9Paul Duffin * Constructor for use by subclasses. When subclassing, you may want to override 6067dd252788645e940eada959bdde927426e2531c9Paul Duffin * {@code toString()} to provide a useful description. 6077dd252788645e940eada959bdde927426e2531c9Paul Duffin */ 6087dd252788645e940eada959bdde927426e2531c9Paul Duffin protected CharMatcher() { 6097dd252788645e940eada959bdde927426e2531c9Paul Duffin description = super.toString(); 6107dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 612bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor // Abstract methods 613bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 614bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** Determines a true or false value for the given character. */ 615bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public abstract boolean matches(char c); 616bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 617bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor // Non-static factories 618bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 619bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 620bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * Returns a matcher that matches any character not matched by this matcher. 621bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 622bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public CharMatcher negate() { 6237dd252788645e940eada959bdde927426e2531c9Paul Duffin return new NegatedMatcher(this); 6247dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6253c77433663281544363151bf284b0240dfd22a42Paul Duffin 6267dd252788645e940eada959bdde927426e2531c9Paul Duffin private static class NegatedMatcher extends CharMatcher { 6277dd252788645e940eada959bdde927426e2531c9Paul Duffin final CharMatcher original; 6283c77433663281544363151bf284b0240dfd22a42Paul Duffin 6297dd252788645e940eada959bdde927426e2531c9Paul Duffin NegatedMatcher(String toString, CharMatcher original) { 6307dd252788645e940eada959bdde927426e2531c9Paul Duffin super(toString); 6317dd252788645e940eada959bdde927426e2531c9Paul Duffin this.original = original; 6327dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6333c77433663281544363151bf284b0240dfd22a42Paul Duffin 6347dd252788645e940eada959bdde927426e2531c9Paul Duffin NegatedMatcher(CharMatcher original) { 6357dd252788645e940eada959bdde927426e2531c9Paul Duffin this(original + ".negate()", original); 6367dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6373c77433663281544363151bf284b0240dfd22a42Paul Duffin 6380888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 6397dd252788645e940eada959bdde927426e2531c9Paul Duffin return !original.matches(c); 6407dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6417dd252788645e940eada959bdde927426e2531c9Paul Duffin 6420888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matchesAllOf(CharSequence sequence) { 6437dd252788645e940eada959bdde927426e2531c9Paul Duffin return original.matchesNoneOf(sequence); 6447dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6457dd252788645e940eada959bdde927426e2531c9Paul Duffin 6460888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matchesNoneOf(CharSequence sequence) { 6477dd252788645e940eada959bdde927426e2531c9Paul Duffin return original.matchesAllOf(sequence); 6487dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6497dd252788645e940eada959bdde927426e2531c9Paul Duffin 6500888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public int countIn(CharSequence sequence) { 6517dd252788645e940eada959bdde927426e2531c9Paul Duffin return sequence.length() - original.countIn(sequence); 6527dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6537dd252788645e940eada959bdde927426e2531c9Paul Duffin 6547dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 6550888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override 6567dd252788645e940eada959bdde927426e2531c9Paul Duffin void setBits(BitSet table) { 6577dd252788645e940eada959bdde927426e2531c9Paul Duffin BitSet tmp = new BitSet(); 6587dd252788645e940eada959bdde927426e2531c9Paul Duffin original.setBits(tmp); 6597dd252788645e940eada959bdde927426e2531c9Paul Duffin tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1); 6607dd252788645e940eada959bdde927426e2531c9Paul Duffin table.or(tmp); 6617dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6627dd252788645e940eada959bdde927426e2531c9Paul Duffin 6630888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public CharMatcher negate() { 6647dd252788645e940eada959bdde927426e2531c9Paul Duffin return original; 6657dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6667dd252788645e940eada959bdde927426e2531c9Paul Duffin 6677dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 6687dd252788645e940eada959bdde927426e2531c9Paul Duffin CharMatcher withToString(String description) { 6697dd252788645e940eada959bdde927426e2531c9Paul Duffin return new NegatedMatcher(description, original); 6707dd252788645e940eada959bdde927426e2531c9Paul Duffin } 671bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 672bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 673bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 6741d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a matcher that matches any character matched by both this matcher and {@code other}. 675bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 676bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public CharMatcher and(CharMatcher other) { 6777dd252788645e940eada959bdde927426e2531c9Paul Duffin return new And(this, checkNotNull(other)); 678bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 679bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 680bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor private static class And extends CharMatcher { 6817dd252788645e940eada959bdde927426e2531c9Paul Duffin final CharMatcher first; 6827dd252788645e940eada959bdde927426e2531c9Paul Duffin final CharMatcher second; 683bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 6847dd252788645e940eada959bdde927426e2531c9Paul Duffin And(CharMatcher a, CharMatcher b) { 6857dd252788645e940eada959bdde927426e2531c9Paul Duffin this(a, b, "CharMatcher.and(" + a + ", " + b + ")"); 686bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 687bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 6887dd252788645e940eada959bdde927426e2531c9Paul Duffin And(CharMatcher a, CharMatcher b, String description) { 6897dd252788645e940eada959bdde927426e2531c9Paul Duffin super(description); 6907dd252788645e940eada959bdde927426e2531c9Paul Duffin first = checkNotNull(a); 6917dd252788645e940eada959bdde927426e2531c9Paul Duffin second = checkNotNull(b); 6927dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6937dd252788645e940eada959bdde927426e2531c9Paul Duffin 6947dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 6957dd252788645e940eada959bdde927426e2531c9Paul Duffin public boolean matches(char c) { 6967dd252788645e940eada959bdde927426e2531c9Paul Duffin return first.matches(c) && second.matches(c); 6977dd252788645e940eada959bdde927426e2531c9Paul Duffin } 6987dd252788645e940eada959bdde927426e2531c9Paul Duffin 6997dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 7000888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override 7017dd252788645e940eada959bdde927426e2531c9Paul Duffin void setBits(BitSet table) { 7027dd252788645e940eada959bdde927426e2531c9Paul Duffin BitSet tmp1 = new BitSet(); 7037dd252788645e940eada959bdde927426e2531c9Paul Duffin first.setBits(tmp1); 7047dd252788645e940eada959bdde927426e2531c9Paul Duffin BitSet tmp2 = new BitSet(); 7057dd252788645e940eada959bdde927426e2531c9Paul Duffin second.setBits(tmp2); 7067dd252788645e940eada959bdde927426e2531c9Paul Duffin tmp1.and(tmp2); 7077dd252788645e940eada959bdde927426e2531c9Paul Duffin table.or(tmp1); 708bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 709bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 7107dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 7117dd252788645e940eada959bdde927426e2531c9Paul Duffin CharMatcher withToString(String description) { 7127dd252788645e940eada959bdde927426e2531c9Paul Duffin return new And(first, second, description); 713bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 714bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 715bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 716bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 7171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a matcher that matches any character matched by either this matcher or {@code other}. 718bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 719bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public CharMatcher or(CharMatcher other) { 7207dd252788645e940eada959bdde927426e2531c9Paul Duffin return new Or(this, checkNotNull(other)); 721bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 722bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 723bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor private static class Or extends CharMatcher { 7247dd252788645e940eada959bdde927426e2531c9Paul Duffin final CharMatcher first; 7257dd252788645e940eada959bdde927426e2531c9Paul Duffin final CharMatcher second; 726bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 7277dd252788645e940eada959bdde927426e2531c9Paul Duffin Or(CharMatcher a, CharMatcher b, String description) { 7287dd252788645e940eada959bdde927426e2531c9Paul Duffin super(description); 7297dd252788645e940eada959bdde927426e2531c9Paul Duffin first = checkNotNull(a); 7307dd252788645e940eada959bdde927426e2531c9Paul Duffin second = checkNotNull(b); 731bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 732bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 7337dd252788645e940eada959bdde927426e2531c9Paul Duffin Or(CharMatcher a, CharMatcher b) { 7347dd252788645e940eada959bdde927426e2531c9Paul Duffin this(a, b, "CharMatcher.or(" + a + ", " + b + ")"); 735bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 736bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 7377dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 7380888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override 7397dd252788645e940eada959bdde927426e2531c9Paul Duffin void setBits(BitSet table) { 7407dd252788645e940eada959bdde927426e2531c9Paul Duffin first.setBits(table); 7417dd252788645e940eada959bdde927426e2531c9Paul Duffin second.setBits(table); 7423c77433663281544363151bf284b0240dfd22a42Paul Duffin } 7433c77433663281544363151bf284b0240dfd22a42Paul Duffin 7447dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 7457dd252788645e940eada959bdde927426e2531c9Paul Duffin public boolean matches(char c) { 7467dd252788645e940eada959bdde927426e2531c9Paul Duffin return first.matches(c) || second.matches(c); 7477dd252788645e940eada959bdde927426e2531c9Paul Duffin } 7487dd252788645e940eada959bdde927426e2531c9Paul Duffin 7497dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 7507dd252788645e940eada959bdde927426e2531c9Paul Duffin CharMatcher withToString(String description) { 7517dd252788645e940eada959bdde927426e2531c9Paul Duffin return new Or(first, second, description); 752bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 753bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 754bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 755bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 7561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to 7571d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * query than the original; your mileage may vary. Precomputation takes time and is likely to be 7581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * worthwhile only if the precomputed matcher is queried many thousands of times. 759bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 7601d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a 7611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a 7621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * worthwhile tradeoff in a browser. 763bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 764bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public CharMatcher precomputed() { 765bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return Platform.precomputeCharMatcher(this); 766bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 767bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 768bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 7697dd252788645e940eada959bdde927426e2531c9Paul Duffin * Subclasses should provide a new CharMatcher with the same characteristics as {@code this}, 7707dd252788645e940eada959bdde927426e2531c9Paul Duffin * but with their {@code toString} method overridden with the new description. 771bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 7727dd252788645e940eada959bdde927426e2531c9Paul Duffin * <p>This is unsupported by default. 7737dd252788645e940eada959bdde927426e2531c9Paul Duffin */ 7747dd252788645e940eada959bdde927426e2531c9Paul Duffin CharMatcher withToString(String description) { 7757dd252788645e940eada959bdde927426e2531c9Paul Duffin throw new UnsupportedOperationException(); 7767dd252788645e940eada959bdde927426e2531c9Paul Duffin } 7777dd252788645e940eada959bdde927426e2531c9Paul Duffin 7787dd252788645e940eada959bdde927426e2531c9Paul Duffin private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1; 7797dd252788645e940eada959bdde927426e2531c9Paul Duffin 7807dd252788645e940eada959bdde927426e2531c9Paul Duffin /** 7817dd252788645e940eada959bdde927426e2531c9Paul Duffin * This is the actual implementation of {@link #precomputed}, but we bounce calls through a 7827dd252788645e940eada959bdde927426e2531c9Paul Duffin * method on {@link Platform} so that we can have different behavior in GWT. 783bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 7847dd252788645e940eada959bdde927426e2531c9Paul Duffin * <p>This implementation tries to be smart in a number of ways. It recognizes cases where 7857dd252788645e940eada959bdde927426e2531c9Paul Duffin * the negation is cheaper to precompute than the matcher itself; it tries to build small 7867dd252788645e940eada959bdde927426e2531c9Paul Duffin * hash tables for matchers that only match a few characters, and so on. In the worst-case 7877dd252788645e940eada959bdde927426e2531c9Paul Duffin * scenario, it constructs an eight-kilobyte bit array and queries that. 7887dd252788645e940eada959bdde927426e2531c9Paul Duffin * In many situations this produces a matcher which is faster to query than the original. 789bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 7907dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 791bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor CharMatcher precomputedInternal() { 7927dd252788645e940eada959bdde927426e2531c9Paul Duffin final BitSet table = new BitSet(); 793bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor setBits(table); 7947dd252788645e940eada959bdde927426e2531c9Paul Duffin int totalCharacters = table.cardinality(); 7957dd252788645e940eada959bdde927426e2531c9Paul Duffin if (totalCharacters * 2 <= DISTINCT_CHARS) { 7967dd252788645e940eada959bdde927426e2531c9Paul Duffin return precomputedPositive(totalCharacters, table, description); 7977dd252788645e940eada959bdde927426e2531c9Paul Duffin } else { 7987dd252788645e940eada959bdde927426e2531c9Paul Duffin // TODO(user): is it worth it to worry about the last character of large matchers? 7997dd252788645e940eada959bdde927426e2531c9Paul Duffin table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1); 8007dd252788645e940eada959bdde927426e2531c9Paul Duffin int negatedCharacters = DISTINCT_CHARS - totalCharacters; 8010888a09821a98ac0680fad765217302858e70fa4Paul Duffin String suffix = ".negate()"; 8020888a09821a98ac0680fad765217302858e70fa4Paul Duffin String negatedDescription = description.endsWith(suffix) 8030888a09821a98ac0680fad765217302858e70fa4Paul Duffin ? description.substring(0, description.length() - suffix.length()) 8040888a09821a98ac0680fad765217302858e70fa4Paul Duffin : description + suffix; 8050888a09821a98ac0680fad765217302858e70fa4Paul Duffin return new NegatedFastMatcher(toString(), 8060888a09821a98ac0680fad765217302858e70fa4Paul Duffin precomputedPositive(negatedCharacters, table, negatedDescription)); 8077dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8087dd252788645e940eada959bdde927426e2531c9Paul Duffin } 809bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 8107dd252788645e940eada959bdde927426e2531c9Paul Duffin /** 8117dd252788645e940eada959bdde927426e2531c9Paul Duffin * A matcher for which precomputation will not yield any significant benefit. 8127dd252788645e940eada959bdde927426e2531c9Paul Duffin */ 8137dd252788645e940eada959bdde927426e2531c9Paul Duffin abstract static class FastMatcher extends CharMatcher { 8147dd252788645e940eada959bdde927426e2531c9Paul Duffin FastMatcher() { 8157dd252788645e940eada959bdde927426e2531c9Paul Duffin super(); 8167dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8173c77433663281544363151bf284b0240dfd22a42Paul Duffin 8187dd252788645e940eada959bdde927426e2531c9Paul Duffin FastMatcher(String description) { 8197dd252788645e940eada959bdde927426e2531c9Paul Duffin super(description); 8207dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8213c77433663281544363151bf284b0240dfd22a42Paul Duffin 8227dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 8237dd252788645e940eada959bdde927426e2531c9Paul Duffin public final CharMatcher precomputed() { 8247dd252788645e940eada959bdde927426e2531c9Paul Duffin return this; 8257dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8267dd252788645e940eada959bdde927426e2531c9Paul Duffin 8277dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 8287dd252788645e940eada959bdde927426e2531c9Paul Duffin public CharMatcher negate() { 8297dd252788645e940eada959bdde927426e2531c9Paul Duffin return new NegatedFastMatcher(this); 8307dd252788645e940eada959bdde927426e2531c9Paul Duffin } 831bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 832bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 8337dd252788645e940eada959bdde927426e2531c9Paul Duffin static final class NegatedFastMatcher extends NegatedMatcher { 8347dd252788645e940eada959bdde927426e2531c9Paul Duffin NegatedFastMatcher(CharMatcher original) { 8357dd252788645e940eada959bdde927426e2531c9Paul Duffin super(original); 8367dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8377dd252788645e940eada959bdde927426e2531c9Paul Duffin 8387dd252788645e940eada959bdde927426e2531c9Paul Duffin NegatedFastMatcher(String toString, CharMatcher original) { 8397dd252788645e940eada959bdde927426e2531c9Paul Duffin super(toString, original); 8407dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8417dd252788645e940eada959bdde927426e2531c9Paul Duffin 8427dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 8437dd252788645e940eada959bdde927426e2531c9Paul Duffin public final CharMatcher precomputed() { 8447dd252788645e940eada959bdde927426e2531c9Paul Duffin return this; 8457dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8467dd252788645e940eada959bdde927426e2531c9Paul Duffin 8477dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 8487dd252788645e940eada959bdde927426e2531c9Paul Duffin CharMatcher withToString(String description) { 8497dd252788645e940eada959bdde927426e2531c9Paul Duffin return new NegatedFastMatcher(description, original); 8503c77433663281544363151bf284b0240dfd22a42Paul Duffin } 8513c77433663281544363151bf284b0240dfd22a42Paul Duffin } 8523c77433663281544363151bf284b0240dfd22a42Paul Duffin 8533c77433663281544363151bf284b0240dfd22a42Paul Duffin /** 8547dd252788645e940eada959bdde927426e2531c9Paul Duffin * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper. 8553c77433663281544363151bf284b0240dfd22a42Paul Duffin */ 8567dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 8570888a09821a98ac0680fad765217302858e70fa4Paul Duffin private static CharMatcher precomputedPositive( 8580888a09821a98ac0680fad765217302858e70fa4Paul Duffin int totalCharacters, 8590888a09821a98ac0680fad765217302858e70fa4Paul Duffin BitSet table, 8607dd252788645e940eada959bdde927426e2531c9Paul Duffin String description) { 8617dd252788645e940eada959bdde927426e2531c9Paul Duffin switch (totalCharacters) { 8627dd252788645e940eada959bdde927426e2531c9Paul Duffin case 0: 8637dd252788645e940eada959bdde927426e2531c9Paul Duffin return NONE; 8647dd252788645e940eada959bdde927426e2531c9Paul Duffin case 1: 8657dd252788645e940eada959bdde927426e2531c9Paul Duffin return is((char) table.nextSetBit(0)); 8667dd252788645e940eada959bdde927426e2531c9Paul Duffin case 2: 8677dd252788645e940eada959bdde927426e2531c9Paul Duffin char c1 = (char) table.nextSetBit(0); 8687dd252788645e940eada959bdde927426e2531c9Paul Duffin char c2 = (char) table.nextSetBit(c1 + 1); 8697dd252788645e940eada959bdde927426e2531c9Paul Duffin return isEither(c1, c2); 8707dd252788645e940eada959bdde927426e2531c9Paul Duffin default: 8717dd252788645e940eada959bdde927426e2531c9Paul Duffin return isSmall(totalCharacters, table.length()) 8727dd252788645e940eada959bdde927426e2531c9Paul Duffin ? SmallCharMatcher.from(table, description) 8737dd252788645e940eada959bdde927426e2531c9Paul Duffin : new BitSetMatcher(table, description); 8747dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8757dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8767dd252788645e940eada959bdde927426e2531c9Paul Duffin 8770888a09821a98ac0680fad765217302858e70fa4Paul Duffin @GwtIncompatible("SmallCharMatcher") 8787dd252788645e940eada959bdde927426e2531c9Paul Duffin private static boolean isSmall(int totalCharacters, int tableLength) { 8797dd252788645e940eada959bdde927426e2531c9Paul Duffin return totalCharacters <= SmallCharMatcher.MAX_SIZE 8800888a09821a98ac0680fad765217302858e70fa4Paul Duffin && tableLength > (totalCharacters * 4 * Character.SIZE); 8810888a09821a98ac0680fad765217302858e70fa4Paul Duffin // err on the side of BitSetMatcher 8827dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8837dd252788645e940eada959bdde927426e2531c9Paul Duffin 8847dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 8857dd252788645e940eada959bdde927426e2531c9Paul Duffin private static class BitSetMatcher extends FastMatcher { 8867dd252788645e940eada959bdde927426e2531c9Paul Duffin private final BitSet table; 887dbd967a6e5c96cc1a97c5521f88dc1564ba2f81bPaul Duffin 8887dd252788645e940eada959bdde927426e2531c9Paul Duffin private BitSetMatcher(BitSet table, String description) { 8897dd252788645e940eada959bdde927426e2531c9Paul Duffin super(description); 8907dd252788645e940eada959bdde927426e2531c9Paul Duffin if (table.length() + Long.SIZE < table.size()) { 8917dd252788645e940eada959bdde927426e2531c9Paul Duffin table = (BitSet) table.clone(); 8927dd252788645e940eada959bdde927426e2531c9Paul Duffin // If only we could actually call BitSet.trimToSize() ourselves... 8937dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8947dd252788645e940eada959bdde927426e2531c9Paul Duffin this.table = table; 8957dd252788645e940eada959bdde927426e2531c9Paul Duffin } 8967dd252788645e940eada959bdde927426e2531c9Paul Duffin 8970888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override public boolean matches(char c) { 8987dd252788645e940eada959bdde927426e2531c9Paul Duffin return table.get(c); 899dbd967a6e5c96cc1a97c5521f88dc1564ba2f81bPaul Duffin } 900dbd967a6e5c96cc1a97c5521f88dc1564ba2f81bPaul Duffin 9017dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 9027dd252788645e940eada959bdde927426e2531c9Paul Duffin void setBits(BitSet bitSet) { 9037dd252788645e940eada959bdde927426e2531c9Paul Duffin bitSet.or(table); 9047dd252788645e940eada959bdde927426e2531c9Paul Duffin } 9057dd252788645e940eada959bdde927426e2531c9Paul Duffin } 9067dd252788645e940eada959bdde927426e2531c9Paul Duffin 9077dd252788645e940eada959bdde927426e2531c9Paul Duffin /** 9087dd252788645e940eada959bdde927426e2531c9Paul Duffin * Sets bits in {@code table} matched by this matcher. 9097dd252788645e940eada959bdde927426e2531c9Paul Duffin */ 9107dd252788645e940eada959bdde927426e2531c9Paul Duffin @GwtIncompatible("java.util.BitSet") 9117dd252788645e940eada959bdde927426e2531c9Paul Duffin void setBits(BitSet table) { 9127dd252788645e940eada959bdde927426e2531c9Paul Duffin for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) { 9137dd252788645e940eada959bdde927426e2531c9Paul Duffin if (matches((char) c)) { 9147dd252788645e940eada959bdde927426e2531c9Paul Duffin table.set(c); 9157dd252788645e940eada959bdde927426e2531c9Paul Duffin } 916bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 917bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 918bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 919bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor // Text processing routines 920bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 921bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 9221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns {@code true} if a character sequence contains at least one matching character. 9231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Equivalent to {@code !matchesNoneOf(sequence)}. 9241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * 9251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 9261d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * character, until this returns {@code true} or the end is reached. 9271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * 9281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @param sequence the character sequence to examine, possibly empty 9291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @return {@code true} if this matcher matches at least one character in the sequence 9301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @since 8.0 9311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert */ 9321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert public boolean matchesAnyOf(CharSequence sequence) { 9331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert return !matchesNoneOf(sequence); 9341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 9351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 9361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /** 9371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns {@code true} if a character sequence contains only matching characters. 938bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 9391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 9401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * character, until this returns {@code false} or the end is reached. 941bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 942bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @param sequence the character sequence to examine, possibly empty 9431d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @return {@code true} if this matcher matches every character in the sequence, including when 9441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * the sequence is empty 945bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 946bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public boolean matchesAllOf(CharSequence sequence) { 947bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor for (int i = sequence.length() - 1; i >= 0; i--) { 948bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (!matches(sequence.charAt(i))) { 949bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return false; 950bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 951bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 952bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return true; 953bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 954bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 955bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 9561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns {@code true} if a character sequence contains no matching characters. Equivalent to 9571d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * {@code !matchesAnyOf(sequence)}. 958bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 9591d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 9601d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * character, until this returns {@code false} or the end is reached. 961bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 962bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @param sequence the character sequence to examine, possibly empty 9631d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @return {@code true} if this matcher matches every character in the sequence, including when 9641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * the sequence is empty 965bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 966bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public boolean matchesNoneOf(CharSequence sequence) { 967bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return indexIn(sequence) == -1; 968bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 969bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 970bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 9711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns the index of the first matching character in a character sequence, or {@code -1} if no 9721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * matching character is present. 973bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 9741d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>The default implementation iterates over the sequence in forward order calling {@link 9751d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * #matches} for each character. 976bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 977bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @param sequence the character sequence to examine from the beginning 978bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @return an index, or {@code -1} if no character matches 979bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 980bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public int indexIn(CharSequence sequence) { 981bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int length = sequence.length(); 982bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor for (int i = 0; i < length; i++) { 983bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (matches(sequence.charAt(i))) { 984bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return i; 985bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 986bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 987bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return -1; 988bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 989bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 990bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 9911d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns the index of the first matching character in a character sequence, starting from a 9921d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * given position, or {@code -1} if no character matches after that position. 993bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 9941d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>The default implementation iterates over the sequence in forward order, beginning at {@code 9951d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * start}, calling {@link #matches} for each character. 996bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 997bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @param sequence the character sequence to examine 9981d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @param start the first index to examine; must be nonnegative and no greater than {@code 9991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * sequence.length()} 10001d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @return the index of the first matching character, guaranteed to be no less than {@code start}, 10011d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * or {@code -1} if no character matches 10021d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @throws IndexOutOfBoundsException if start is negative or greater than {@code 10031d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * sequence.length()} 1004bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 1005bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public int indexIn(CharSequence sequence, int start) { 1006bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int length = sequence.length(); 1007bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor Preconditions.checkPositionIndex(start, length); 1008bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor for (int i = start; i < length; i++) { 1009bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (matches(sequence.charAt(i))) { 1010bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return i; 1011bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1012bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1013bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return -1; 1014bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1015bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1016bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 10171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns the index of the last matching character in a character sequence, or {@code -1} if no 10181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * matching character is present. 1019bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 10201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>The default implementation iterates over the sequence in reverse order calling {@link 10211d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * #matches} for each character. 1022bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1023bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @param sequence the character sequence to examine from the end 1024bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @return an index, or {@code -1} if no character matches 1025bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 1026bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public int lastIndexIn(CharSequence sequence) { 1027bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor for (int i = sequence.length() - 1; i >= 0; i--) { 1028bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (matches(sequence.charAt(i))) { 1029bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return i; 1030bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1031bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1032bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return -1; 1033bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1034bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1035bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 1036bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * Returns the number of matching characters found in a character sequence. 1037bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 1038bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public int countIn(CharSequence sequence) { 1039bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int count = 0; 1040bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor for (int i = 0; i < sequence.length(); i++) { 1041bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (matches(sequence.charAt(i))) { 1042bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor count++; 1043bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1044bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1045bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return count; 1046bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1047bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1048bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 10491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a string containing all non-matching characters of a character sequence, in order. For 10501d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * example: <pre> {@code 1051bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1052bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * CharMatcher.is('a').removeFrom("bazaar")}</pre> 1053bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1054bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * ... returns {@code "bzr"}. 1055bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 10561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert @CheckReturnValue 1057bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public String removeFrom(CharSequence sequence) { 1058bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor String string = sequence.toString(); 1059bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int pos = indexIn(string); 1060bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (pos == -1) { 1061bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return string; 1062bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1063bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1064bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor char[] chars = string.toCharArray(); 1065bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int spread = 1; 1066bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1067bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor // This unusual loop comes from extensive benchmarking 10681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert OUT: while (true) { 1069bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor pos++; 1070bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor while (true) { 1071bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (pos == chars.length) { 1072bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor break OUT; 1073bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1074bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (matches(chars[pos])) { 1075bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor break; 1076bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1077bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor chars[pos - spread] = chars[pos]; 1078bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor pos++; 1079bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1080bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor spread++; 1081bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1082bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return new String(chars, 0, pos - spread); 1083bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1084bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1085bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 10861d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a string containing all matching characters of a character sequence, in order. For 10871d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * example: <pre> {@code 1088bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1089bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * CharMatcher.is('a').retainFrom("bazaar")}</pre> 1090bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1091bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * ... returns {@code "aaa"}. 1092bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 10931d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert @CheckReturnValue 1094bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public String retainFrom(CharSequence sequence) { 1095bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return negate().removeFrom(sequence); 1096bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1097bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1098bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 10991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a string copy of the input character sequence, with each character that matches this 11001d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * matcher replaced by a given replacement character. For example: <pre> {@code 1101bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1102bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * CharMatcher.is('a').replaceFrom("radar", 'o')}</pre> 1103bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1104bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * ... returns {@code "rodor"}. 1105bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 11061d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching 11071d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each 11081d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * character. 1109bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1110bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @param sequence the character sequence to replace matching characters in 11111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @param replacement the character to append to the result string in place of each matching 11121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * character in {@code sequence} 1113bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @return the new string 1114bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 11151d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert @CheckReturnValue 1116bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public String replaceFrom(CharSequence sequence, char replacement) { 1117bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor String string = sequence.toString(); 1118bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int pos = indexIn(string); 1119bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (pos == -1) { 1120bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return string; 1121bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1122bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor char[] chars = string.toCharArray(); 1123bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor chars[pos] = replacement; 1124bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor for (int i = pos + 1; i < chars.length; i++) { 1125bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (matches(chars[i])) { 1126bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor chars[i] = replacement; 1127bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1128bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1129bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return new String(chars); 1130bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1131bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1132bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 11331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a string copy of the input character sequence, with each character that matches this 11341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * matcher replaced by a given replacement sequence. For example: <pre> {@code 1135bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1136bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre> 1137bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1138bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * ... returns {@code "yoohoo"}. 1139bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 11401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better 11411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * off calling {@link #replaceFrom(CharSequence, char)} directly. 1142bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1143bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @param sequence the character sequence to replace matching characters in 11441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @param replacement the characters to append to the result string in place of each matching 11451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * character in {@code sequence} 1146bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @return the new string 1147bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 11481d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert @CheckReturnValue 1149bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public String replaceFrom(CharSequence sequence, CharSequence replacement) { 1150bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int replacementLen = replacement.length(); 1151bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (replacementLen == 0) { 1152bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return removeFrom(sequence); 1153bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1154bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (replacementLen == 1) { 1155bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return replaceFrom(sequence, replacement.charAt(0)); 1156bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1157bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1158bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor String string = sequence.toString(); 1159bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int pos = indexIn(string); 1160bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (pos == -1) { 1161bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return string; 1162bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1163bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1164bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int len = string.length(); 11651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert StringBuilder buf = new StringBuilder((len * 3 / 2) + 16); 1166bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1167bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int oldpos = 0; 1168bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor do { 1169bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor buf.append(string, oldpos, pos); 1170bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor buf.append(replacement); 1171bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor oldpos = pos + 1; 1172bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor pos = indexIn(string, oldpos); 1173bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } while (pos != -1); 1174bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1175bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor buf.append(string, oldpos, len); 1176bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return buf.toString(); 1177bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1178bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1179bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 11801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a substring of the input character sequence that omits all characters this matcher 11811d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * matches from the beginning and from the end of the string. For example: <pre> {@code 1182bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1183bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre> 1184bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1185bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * ... returns {@code "cat"}. 1186bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 11871d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>Note that: <pre> {@code 1188bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1189bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre> 1190bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1191bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * ... is equivalent to {@link String#trim()}. 1192bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 11931d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert @CheckReturnValue 1194bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public String trimFrom(CharSequence sequence) { 1195bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int len = sequence.length(); 1196bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int first; 1197bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int last; 1198bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1199bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor for (first = 0; first < len; first++) { 1200bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (!matches(sequence.charAt(first))) { 1201bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor break; 1202bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1203bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1204bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor for (last = len - 1; last > first; last--) { 1205bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (!matches(sequence.charAt(last))) { 1206bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor break; 1207bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1208bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1209bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1210bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return sequence.subSequence(first, last + 1).toString(); 1211bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1212bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1213bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 12141d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a substring of the input character sequence that omits all characters this matcher 12151d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * matches from the beginning of the string. For example: <pre> {@code 1216bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1217bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre> 1218bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1219bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * ... returns {@code "catbab"}. 1220bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 12211d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert @CheckReturnValue 1222bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public String trimLeadingFrom(CharSequence sequence) { 1223bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int len = sequence.length(); 12247dd252788645e940eada959bdde927426e2531c9Paul Duffin for (int first = 0; first < len; first++) { 1225bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (!matches(sequence.charAt(first))) { 12267dd252788645e940eada959bdde927426e2531c9Paul Duffin return sequence.subSequence(first, len).toString(); 1227bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1228bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 12297dd252788645e940eada959bdde927426e2531c9Paul Duffin return ""; 1230bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1231bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1232bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 12331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a substring of the input character sequence that omits all characters this matcher 12341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * matches from the end of the string. For example: <pre> {@code 1235bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1236bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre> 1237bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1238bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * ... returns {@code "abacat"}. 1239bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 12401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert @CheckReturnValue 1241bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public String trimTrailingFrom(CharSequence sequence) { 1242bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor int len = sequence.length(); 12437dd252788645e940eada959bdde927426e2531c9Paul Duffin for (int last = len - 1; last >= 0; last--) { 1244bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor if (!matches(sequence.charAt(last))) { 12457dd252788645e940eada959bdde927426e2531c9Paul Duffin return sequence.subSequence(0, last + 1).toString(); 1246bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1247bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 12487dd252788645e940eada959bdde927426e2531c9Paul Duffin return ""; 1249bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1250bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1251bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 12521d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Returns a string copy of the input character sequence, with each group of consecutive 12531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * characters that match this matcher replaced by a single replacement character. For example: 12541d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <pre> {@code 1255bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1256bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre> 1257bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 1258bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * ... returns {@code "b-p-r"}. 1259bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 12601d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching 12611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each 12621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * character. 1263bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * 12641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @param sequence the character sequence to replace matching groups of characters in 12651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * @param replacement the character to append to the result string in place of each group of 12661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * matching characters in {@code sequence} 1267bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor * @return the new string 1268bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 12691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert @CheckReturnValue 1270bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public String collapseFrom(CharSequence sequence, char replacement) { 12717dd252788645e940eada959bdde927426e2531c9Paul Duffin // This implementation avoids unnecessary allocation. 12727dd252788645e940eada959bdde927426e2531c9Paul Duffin int len = sequence.length(); 12737dd252788645e940eada959bdde927426e2531c9Paul Duffin for (int i = 0; i < len; i++) { 1274bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor char c = sequence.charAt(i); 12757dd252788645e940eada959bdde927426e2531c9Paul Duffin if (matches(c)) { 12760888a09821a98ac0680fad765217302858e70fa4Paul Duffin if (c == replacement 12770888a09821a98ac0680fad765217302858e70fa4Paul Duffin && (i == len - 1 || !matches(sequence.charAt(i + 1)))) { 12787dd252788645e940eada959bdde927426e2531c9Paul Duffin // a no-op replacement 12797dd252788645e940eada959bdde927426e2531c9Paul Duffin i++; 12807dd252788645e940eada959bdde927426e2531c9Paul Duffin } else { 12810888a09821a98ac0680fad765217302858e70fa4Paul Duffin StringBuilder builder = new StringBuilder(len) 12820888a09821a98ac0680fad765217302858e70fa4Paul Duffin .append(sequence.subSequence(0, i)) 12830888a09821a98ac0680fad765217302858e70fa4Paul Duffin .append(replacement); 12847dd252788645e940eada959bdde927426e2531c9Paul Duffin return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true); 1285bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1286bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1287bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 12887dd252788645e940eada959bdde927426e2531c9Paul Duffin // no replacement needed 12897dd252788645e940eada959bdde927426e2531c9Paul Duffin return sequence.toString(); 1290bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1291bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1292bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 12931d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that 12941d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * groups of matching characters at the start or end of the sequence are removed without 12951d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * replacement. 1296bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 12971d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert @CheckReturnValue 1298bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor public String trimAndCollapseFrom(CharSequence sequence, char replacement) { 12997dd252788645e940eada959bdde927426e2531c9Paul Duffin // This implementation avoids unnecessary allocation. 13007dd252788645e940eada959bdde927426e2531c9Paul Duffin int len = sequence.length(); 13017dd252788645e940eada959bdde927426e2531c9Paul Duffin int first; 13027dd252788645e940eada959bdde927426e2531c9Paul Duffin int last; 13037dd252788645e940eada959bdde927426e2531c9Paul Duffin 13047dd252788645e940eada959bdde927426e2531c9Paul Duffin for (first = 0; first < len && matches(sequence.charAt(first)); first++) {} 13057dd252788645e940eada959bdde927426e2531c9Paul Duffin for (last = len - 1; last > first && matches(sequence.charAt(last)); last--) {} 13067dd252788645e940eada959bdde927426e2531c9Paul Duffin 13070888a09821a98ac0680fad765217302858e70fa4Paul Duffin return (first == 0 && last == len - 1) 13080888a09821a98ac0680fad765217302858e70fa4Paul Duffin ? collapseFrom(sequence, replacement) 13090888a09821a98ac0680fad765217302858e70fa4Paul Duffin : finishCollapseFrom( 13100888a09821a98ac0680fad765217302858e70fa4Paul Duffin sequence, first, last + 1, replacement, 13110888a09821a98ac0680fad765217302858e70fa4Paul Duffin new StringBuilder(last + 1 - first), 13120888a09821a98ac0680fad765217302858e70fa4Paul Duffin false); 13137dd252788645e940eada959bdde927426e2531c9Paul Duffin } 13147dd252788645e940eada959bdde927426e2531c9Paul Duffin 13150888a09821a98ac0680fad765217302858e70fa4Paul Duffin private String finishCollapseFrom( 13160888a09821a98ac0680fad765217302858e70fa4Paul Duffin CharSequence sequence, int start, int end, char replacement, 13177dd252788645e940eada959bdde927426e2531c9Paul Duffin StringBuilder builder, boolean inMatchingGroup) { 13187dd252788645e940eada959bdde927426e2531c9Paul Duffin for (int i = start; i < end; i++) { 1319bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor char c = sequence.charAt(i); 13207dd252788645e940eada959bdde927426e2531c9Paul Duffin if (matches(c)) { 13217dd252788645e940eada959bdde927426e2531c9Paul Duffin if (!inMatchingGroup) { 1322bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor builder.append(replacement); 13237dd252788645e940eada959bdde927426e2531c9Paul Duffin inMatchingGroup = true; 1324bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 13257dd252788645e940eada959bdde927426e2531c9Paul Duffin } else { 1326bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor builder.append(c); 13277dd252788645e940eada959bdde927426e2531c9Paul Duffin inMatchingGroup = false; 1328bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1329bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1330bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return builder.toString(); 1331bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 1332bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor 1333bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor /** 13340888a09821a98ac0680fad765217302858e70fa4Paul Duffin * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches} 13350888a09821a98ac0680fad765217302858e70fa4Paul Duffin * instead. 1336bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor */ 13370888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Deprecated 13380888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override 13397dd252788645e940eada959bdde927426e2531c9Paul Duffin public boolean apply(Character character) { 1340bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor return matches(character); 1341bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor } 13427dd252788645e940eada959bdde927426e2531c9Paul Duffin 13437dd252788645e940eada959bdde927426e2531c9Paul Duffin /** 13447dd252788645e940eada959bdde927426e2531c9Paul Duffin * Returns a string representation of this {@code CharMatcher}, such as 13457dd252788645e940eada959bdde927426e2531c9Paul Duffin * {@code CharMatcher.or(WHITESPACE, JAVA_DIGIT)}. 13467dd252788645e940eada959bdde927426e2531c9Paul Duffin */ 13477dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 13487dd252788645e940eada959bdde927426e2531c9Paul Duffin public String toString() { 13497dd252788645e940eada959bdde927426e2531c9Paul Duffin return description; 13507dd252788645e940eada959bdde927426e2531c9Paul Duffin } 13517dd252788645e940eada959bdde927426e2531c9Paul Duffin 13520888a09821a98ac0680fad765217302858e70fa4Paul Duffin static final String WHITESPACE_TABLE = "" 13530888a09821a98ac0680fad765217302858e70fa4Paul Duffin + "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000" 13540888a09821a98ac0680fad765217302858e70fa4Paul Duffin + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680" 13550888a09821a98ac0680fad765217302858e70fa4Paul Duffin + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009" 13560888a09821a98ac0680fad765217302858e70fa4Paul Duffin + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000"; 13570888a09821a98ac0680fad765217302858e70fa4Paul Duffin static final int WHITESPACE_MULTIPLIER = 1682554634; 13580888a09821a98ac0680fad765217302858e70fa4Paul Duffin static final int WHITESPACE_SHIFT = Integer.numberOfLeadingZeros(WHITESPACE_TABLE.length() - 1); 13597dd252788645e940eada959bdde927426e2531c9Paul Duffin 13607dd252788645e940eada959bdde927426e2531c9Paul Duffin /** 13617dd252788645e940eada959bdde927426e2531c9Paul Duffin * Determines whether a character is whitespace according to the latest Unicode standard, as 13627dd252788645e940eada959bdde927426e2531c9Paul Duffin * illustrated 13637dd252788645e940eada959bdde927426e2531c9Paul Duffin * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>. 13647dd252788645e940eada959bdde927426e2531c9Paul Duffin * This is not the same definition used by other Java APIs. (See a 13657dd252788645e940eada959bdde927426e2531c9Paul Duffin * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several 13667dd252788645e940eada959bdde927426e2531c9Paul Duffin * definitions of "whitespace"</a>.) 13677dd252788645e940eada959bdde927426e2531c9Paul Duffin * 13687dd252788645e940eada959bdde927426e2531c9Paul Duffin * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up 13697dd252788645e940eada959bdde927426e2531c9Paul Duffin * to date. 13707dd252788645e940eada959bdde927426e2531c9Paul Duffin */ 13710888a09821a98ac0680fad765217302858e70fa4Paul Duffin public static final CharMatcher WHITESPACE = new FastMatcher("WHITESPACE") { 13727dd252788645e940eada959bdde927426e2531c9Paul Duffin @Override 13737dd252788645e940eada959bdde927426e2531c9Paul Duffin public boolean matches(char c) { 13740888a09821a98ac0680fad765217302858e70fa4Paul Duffin return WHITESPACE_TABLE.charAt((WHITESPACE_MULTIPLIER * c) >>> WHITESPACE_SHIFT) == c; 13750888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 13760888a09821a98ac0680fad765217302858e70fa4Paul Duffin 13770888a09821a98ac0680fad765217302858e70fa4Paul Duffin @GwtIncompatible("java.util.BitSet") 13780888a09821a98ac0680fad765217302858e70fa4Paul Duffin @Override 13790888a09821a98ac0680fad765217302858e70fa4Paul Duffin void setBits(BitSet table) { 13800888a09821a98ac0680fad765217302858e70fa4Paul Duffin for (int i = 0; i < WHITESPACE_TABLE.length(); i++) { 13810888a09821a98ac0680fad765217302858e70fa4Paul Duffin table.set(WHITESPACE_TABLE.charAt(i)); 13820888a09821a98ac0680fad765217302858e70fa4Paul Duffin } 13837dd252788645e940eada959bdde927426e2531c9Paul Duffin } 13847dd252788645e940eada959bdde927426e2531c9Paul Duffin }; 1385bfe2dd089341dcb4c1fb65a5b6b077ad0ebbf6dcDan Egnor} 1386