18b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira/* 28b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Copyright (C) 2008 Google Inc. 38b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 48b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Licensed under the Apache License, Version 2.0 (the "License"); 58b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * you may not use this file except in compliance with the License. 68b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * You may obtain a copy of the License at 78b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 88b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * http://www.apache.org/licenses/LICENSE-2.0 98b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Unless required by applicable law or agreed to in writing, software 118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * distributed under the License is distributed on an "AS IS" BASIS, 128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * See the License for the specific language governing permissions and 148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * limitations under the License. 158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 1730e2c24b056542f3b1b438aeb798305d1226d0c8Andy Huangpackage com.android.mail.lib.base; 188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 1930e2c24b056542f3b1b438aeb798305d1226d0c8Andy Huangimport static com.android.mail.lib.base.Preconditions.checkArgument; 2030e2c24b056542f3b1b438aeb798305d1226d0c8Andy Huangimport static com.android.mail.lib.base.Preconditions.checkNotNull; 218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereiraimport java.util.ArrayList; 238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereiraimport java.util.Arrays; 248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereiraimport java.util.List; 258b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 268b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira/** 278b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines a true or false value for any Java {@code char} value, just as 288b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * {@link Predicate} does for any {@link Object}. Also offers basic text 298b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * processing methods based on this function. Implementations are strongly 308b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * encouraged to be side-effect-free and immutable. 318b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 328b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>Throughout the documentation of this class, the phrase "matching 338b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * character" is used to mean "any character {@code c} for which {@code 348b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * this.matches(c)} returns {@code true}". 358b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 368b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p><b>Note:</b> This class deals only with {@code char} values; it does not 378b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * understand supplementary Unicode code points in the range {@code 0x10000} to 388b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * {@code 0x10FFFF}. Such logical characters are encoded into a {@code String} 398b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * using surrogate pairs, and a {@code CharMatcher} treats these just as two 408b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * separate characters. 418b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 428b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @author Kevin Bourrillion 438b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @since 2009.09.15 <b>tentative</b> 448b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 458b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereirapublic abstract class CharMatcher implements Predicate<Character> { 468b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 478b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // Constants 488b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 498b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // Excludes 2000-2000a, which is handled as a range 508b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira private static final String BREAKING_WHITESPACE_CHARS = 518b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000"; 528b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 538b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // Excludes 2007, which is handled as a gap in a pair of ranges 548b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira private static final String NON_BREAKING_WHITESPACE_CHARS = 558b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira "\u00a0\u180e\u202f"; 568b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 578b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 588b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is whitespace according to the latest 598b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Unicode standard, as illustrated 608b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>. 618b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * This is not the same definition used by other Java APIs. See a comparison 628b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * of several definitions of "whitespace" at 638b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <a href="TODO">(TODO)</a>. 648b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 658b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p><b>Note:</b> as the Unicode definition evolves, we will modify this 668b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * constant to keep it up to date. 678b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 688b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher WHITESPACE = 698b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS) 708b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u2000', '\u200a')); 718b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 728b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 738b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is a breaking whitespace (that is, 748b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * a whitespace which can be interpreted as a break between words 758b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * for formatting purposes). See {@link #WHITESPACE} for a discussion 768b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * of that term. 778b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 788b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @since 2010.01.04 <b>tentative</b> 798b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 808b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher BREAKING_WHITESPACE = 818b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira anyOf(BREAKING_WHITESPACE_CHARS) 828b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u2000', '\u2006')) 838b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u2008', '\u200a')); 848b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 858b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 868b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is ASCII, meaning that its code point is 878b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * less than 128. 888b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 898b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher ASCII = inRange('\0', '\u007f'); 908b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 918b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 928b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is a digit according to 938b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. 948b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 958b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher DIGIT; 968b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 978b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira static { 988b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira CharMatcher digit = inRange('0', '9'); 998b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira String zeroes = 1008b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66" 1018b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946" 1028b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10"; 1038b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (char base : zeroes.toCharArray()) { 1048b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira digit = digit.or(inRange(base, (char) (base + 9))); 1058b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 1068b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira DIGIT = digit; 1078b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 1088b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 1098b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 1108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is whitespace according to {@link 1118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Character#isWhitespace(char) Java's definition}; it is usually preferable 1128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * to use {@link #WHITESPACE}. See a comparison of several definitions of 1138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * "whitespace" at <a href="http://go/white+space">go/white+space</a>. 1148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 1158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher JAVA_WHITESPACE 1168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira = inRange('\u0009', (char) 13) // \\u000d doesn't work as a char literal 1178b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u001c', '\u0020')) 1188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(is('\u1680')) 1198b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(is('\u180e')) 1208b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u2000', '\u2006')) 1218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u2008', '\u200b')) 1228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u2028', '\u2029')) 1238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(is('\u205f')) 1248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(is('\u3000')); 1258b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 1268b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 1278b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is a digit according to {@link 1288b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Character#isDigit(char) Java's definition}. If you only care to match 1298b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * ASCII digits, you can use {@code inRange('0', '9')}. 1308b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 1318b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher JAVA_DIGIT = new CharMatcher() { 1328b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 1338b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return Character.isDigit(c); 1348b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 1358b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 1368b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 1378b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 1388b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is a letter according to {@link 1398b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Character#isLetter(char) Java's definition}. If you only care to match 1408b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * letters of the Latin alphabet, you can use {@code 1418b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * inRange('a', 'z').or(inRange('A', 'Z'))}. 1428b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 1438b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher JAVA_LETTER = new CharMatcher() { 1448b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 1458b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return Character.isLetter(c); 1468b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 1478b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 1488b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 1498b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 1508b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is a letter or digit according to {@link 1518b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Character#isLetterOrDigit(char) Java's definition}. 1528b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 1538b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() { 1548b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 1558b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return Character.isLetterOrDigit(c); 1568b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 1578b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 1588b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 1598b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 1608b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is upper case according to {@link 1618b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Character#isUpperCase(char) Java's definition}. 1628b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 1638b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() { 1648b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 1658b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return Character.isUpperCase(c); 1668b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 1678b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 1688b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 1698b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 1708b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is lower case according to {@link 1718b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Character#isLowerCase(char) Java's definition}. 1728b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 1738b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() { 1748b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 1758b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return Character.isLowerCase(c); 1768b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 1778b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 1788b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 1798b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 1808b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is an ISO control character according to 1818b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * {@link Character#isISOControl(char)}. 1828b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 1838b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher JAVA_ISO_CONTROL = inRange('\u0000', '\u001f') 1848b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u007f', '\u009f')); 1858b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 1868b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 1878b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is invisible; that is, if its Unicode 1888b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * category is any of SPACE_SEPARATOR, LINE_SEPARATOR, 1898b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and PRIVATE_USE according 1908b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * to ICU4J. 1918b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 1928b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020') 1938b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u007f', '\u00a0')) 1948b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(is('\u00ad')) 1958b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u0600', '\u0603')) 1968b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e")) 1978b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u2000', '\u200f')) 1988b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u2028', '\u202f')) 1998b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u205f', '\u2064')) 2008b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u206a', '\u206f')) 2018b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(is('\u3000')) 2028b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\ud800', '\uf8ff')) 2038b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(anyOf("\ufeff\ufff9\ufffa\ufffb")); 2048b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 2058b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 2068b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is single-width (not double-width). When 2078b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * in doubt, this matcher errs on the side of returning {@code false} (that 2088b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * is, it tends to assume a character is double-width). 2098b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 2108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <b>Note:</b> as the reference file evolves, we will modify this constant 2118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * to keep it up to date. 2128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 2138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9') 2148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(is('\u05be')) 2158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u05d0', '\u05ea')) 2168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(is('\u05f3')) 2178b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(is('\u05f4')) 2188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u0600', '\u06ff')) 2198b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u0750', '\u077f')) 2208b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u0e00', '\u0e7f')) 2218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u1e00', '\u20af')) 2228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\u2100', '\u213a')) 2238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\ufb50', '\ufdff')) 2248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\ufe70', '\ufeff')) 2258b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .or(inRange('\uff61', '\uffdc')); 2268b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 2278b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 2288b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Determines whether a character is whitespace according to an arbitrary definition used by 2298b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * {@link StringUtil} for years. Most likely you don't want to use this. See a comparison of 2308b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * several definitions of "whitespace" at <a href="http://goto/white space">goto/white space</a>. 2318b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 2328b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p><b>To be deprecated.</b> use {@link #WHITESPACE} to switch to the Unicode definition, or 2338b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * create a matcher for the specific characters you want. Not deprecated yet because it is a 2348b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * stepping stone for getting off of many deprecated {@link StringUtil} methods. 2358b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 2368b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Deprecated 2378b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher LEGACY_WHITESPACE = 2388b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira anyOf(" \r\n\t\u3000\u00A0\u2007\u202F").precomputed(); 2398b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 2408b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 2418b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** Matches any character. */ 2428b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher ANY = new CharMatcher() { 2438b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 2448b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return true; 2458b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2468b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 2478b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public int indexIn(CharSequence sequence) { 2488b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return (sequence.length() == 0) ? -1 : 0; 2498b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2508b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public int indexIn(CharSequence sequence, int start) { 2518b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int length = sequence.length(); 2528b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira Preconditions.checkPositionIndex(start, length); 2538b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return (start == length) ? -1 : start; 2548b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2558b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public int lastIndexIn(CharSequence sequence) { 2568b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.length() - 1; 2578b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2588b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matchesAllOf(CharSequence sequence) { 2598b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkNotNull(sequence); 2608b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return true; 2618b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2628b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matchesNoneOf(CharSequence sequence) { 2638b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.length() == 0; 2648b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2658b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public String removeFrom(CharSequence sequence) { 2668b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkNotNull(sequence); 2678b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return ""; 2688b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2698b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public String replaceFrom( 2708b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira CharSequence sequence, char replacement) { 2718b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira char[] array = new char[sequence.length()]; 2728b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira Arrays.fill(array, replacement); 2738b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new String(array); 2748b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2758b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public String replaceFrom( 2768b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira CharSequence sequence, CharSequence replacement) { 2778b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira StringBuilder retval = new StringBuilder(sequence.length() * replacement.length()); 2788b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (int i = 0; i < sequence.length(); i++) { 2798b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira retval.append(replacement); 2808b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2818b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return retval.toString(); 2828b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2838b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public String collapseFrom(CharSequence sequence, char replacement) { 2848b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return (sequence.length() == 0) ? "" : String.valueOf(replacement); 2858b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2868b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public String trimFrom(CharSequence sequence) { 2878b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkNotNull(sequence); 2888b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return ""; 2898b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2908b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public int countIn(CharSequence sequence) { 2918b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.length(); 2928b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2938b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher and(CharMatcher other) { 2948b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return checkNotNull(other); 2958b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 2968b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher or(CharMatcher other) { 2978b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkNotNull(other); 2988b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return this; 2998b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3008b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher negate() { 3018b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return NONE; 3028b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3038b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher precomputed() { 3048b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return this; 3058b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3068b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 3078b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 3088b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** Matches no characters. */ 3098b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static final CharMatcher NONE = new CharMatcher() { 3108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 3118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return false; 3128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 3148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public int indexIn(CharSequence sequence) { 3158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkNotNull(sequence); 3168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return -1; 3178b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public int indexIn(CharSequence sequence, int start) { 3198b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int length = sequence.length(); 3208b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira Preconditions.checkPositionIndex(start, length); 3218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return -1; 3228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public int lastIndexIn(CharSequence sequence) { 3248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkNotNull(sequence); 3258b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return -1; 3268b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3278b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matchesAllOf(CharSequence sequence) { 3288b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.length() == 0; 3298b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3308b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matchesNoneOf(CharSequence sequence) { 3318b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkNotNull(sequence); 3328b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return true; 3338b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3348b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public String removeFrom(CharSequence sequence) { 3358b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.toString(); 3368b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3378b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public String replaceFrom( 3388b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira CharSequence sequence, char replacement) { 3398b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.toString(); 3408b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3418b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public String replaceFrom( 3428b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira CharSequence sequence, CharSequence replacement) { 3438b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkNotNull(replacement); 3448b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.toString(); 3458b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3468b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public String collapseFrom( 3478b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira CharSequence sequence, char replacement) { 3488b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.toString(); 3498b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3508b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public String trimFrom(CharSequence sequence) { 3518b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.toString(); 3528b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3538b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public int countIn(CharSequence sequence) { 3548b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkNotNull(sequence); 3558b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return 0; 3568b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3578b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher and(CharMatcher other) { 3588b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkNotNull(other); 3598b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return this; 3608b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3618b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher or(CharMatcher other) { 3628b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return checkNotNull(other); 3638b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3648b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher negate() { 3658b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return ANY; 3668b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3678b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override protected void setBits(LookupTable table) { 3688b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3698b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher precomputed() { 3708b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return this; 3718b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3728b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 3738b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 3748b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // Static factories 3758b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 3768b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 3778b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a {@code char} matcher that matches only one specified character. 3788b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 3798b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static CharMatcher is(final char match) { 3808b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new CharMatcher() { 3818b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 3828b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return c == match; 3838b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3848b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 3858b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public String replaceFrom( 3868b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira CharSequence sequence, char replacement) { 3878b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.toString().replace(match, replacement); 3888b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3898b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher and(CharMatcher other) { 3908b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return other.matches(match) ? this : NONE; 3918b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3928b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher or(CharMatcher other) { 3938b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return other.matches(match) ? other : super.or(other); 3948b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3958b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher negate() { 3968b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return isNot(match); 3978b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 3988b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override protected void setBits(LookupTable table) { 3998b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira table.set(match); 4008b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4018b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher precomputed() { 4028b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return this; 4038b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4048b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 4058b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4068b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 4078b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 4088b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a {@code char} matcher that matches any character except the one 4098b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * specified. 4108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 4118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>To negate another {@code CharMatcher}, use {@link #negate()}. 4128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 4138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static CharMatcher isNot(final char match) { 4148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new CharMatcher() { 4158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 4168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return c != match; 4178b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 4198b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher and(CharMatcher other) { 4208b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return other.matches(match) ? super.and(other) : other; 4218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher or(CharMatcher other) { 4238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return other.matches(match) ? ANY : this; 4248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4258b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher negate() { 4268b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return is(match); 4278b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4288b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 4298b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4308b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 4318b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 4328b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a {@code char} matcher that matches any character present in the 4338b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * given character sequence. 4348b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 4358b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static CharMatcher anyOf(final CharSequence sequence) { 4368b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira switch (sequence.length()) { 4378b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira case 0: 4388b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return NONE; 4398b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira case 1: 4408b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return is(sequence.charAt(0)); 4418b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira case 2: 4428b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira final char match1 = sequence.charAt(0); 4438b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira final char match2 = sequence.charAt(1); 4448b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new CharMatcher() { 4458b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 4468b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return c == match1 || c == match2; 4478b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4488b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override protected void setBits(LookupTable table) { 4498b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira table.set(match1); 4508b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira table.set(match2); 4518b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4528b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher precomputed() { 4538b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return this; 4548b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4558b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 4568b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4578b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 4588b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira final char[] chars = sequence.toString().toCharArray(); 4598b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira Arrays.sort(chars); // not worth collapsing duplicates 4608b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 4618b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new CharMatcher() { 4628b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 4638b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return Arrays.binarySearch(chars, c) >= 0; 4648b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4658b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override protected void setBits(LookupTable table) { 4668b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (char c : chars) { 4678b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira table.set(c); 4688b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4698b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4708b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 4718b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4728b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 4738b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 4748b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a {@code char} matcher that matches any character not present in 4758b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * the given character sequence. 4768b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 4778b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static CharMatcher noneOf(CharSequence sequence) { 4788b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return anyOf(sequence).negate(); 4798b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4808b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 4818b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 4828b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a {@code char} matcher that matches any character in a given range 4838b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * (both endpoints are inclusive). For example, to match any lowercase letter 4848b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * of the English alphabet, use {@code CharMatcher.inRange('a', 'z')}. 4858b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 4868b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @throws IllegalArgumentException if {@code endInclusive < startInclusive} 4878b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 4888b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static CharMatcher inRange( 4898b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira final char startInclusive, final char endInclusive) { 4908b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkArgument(endInclusive >= startInclusive); 4918b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new CharMatcher() { 4928b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 4938b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return startInclusive <= c && c <= endInclusive; 4948b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 4958b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override protected void setBits(LookupTable table) { 4968b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira char c = startInclusive; 4978b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira while (true) { 4988b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira table.set(c); 4998b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (c++ == endInclusive) { 5008b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira break; 5018b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5028b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5038b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5048b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher precomputed() { 5058b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return this; 5068b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5078b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 5088b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5098b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 5118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a matcher with identical behavior to the given {@link 5128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Character}-based predicate, but which operates on primitive {@code char} 5138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * instances instead. 5148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 5158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public static CharMatcher forPredicate( 5168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira final Predicate<? super Character> predicate) { 5178b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira checkNotNull(predicate); 5188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (predicate instanceof CharMatcher) { 5198b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return (CharMatcher) predicate; 5208b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new CharMatcher() { 5228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 5238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return predicate.apply(c); 5248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5258b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean apply(Character character) { 5268b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return predicate.apply(checkNotNull(character)); 5278b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5288b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 5298b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5308b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5318b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // Abstract methods 5328b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5338b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** Determines a true or false value for the given character. */ 5348b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public abstract boolean matches(char c); 5358b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5368b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // Non-static factories 5378b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5388b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 5398b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a matcher that matches any character not matched by this matcher. 5408b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 5418b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public CharMatcher negate() { 5428b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira final CharMatcher original = this; 5438b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new CharMatcher() { 5448b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 5458b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return !original.matches(c); 5468b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5478b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5488b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matchesAllOf(CharSequence sequence) { 5498b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return original.matchesNoneOf(sequence); 5508b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5518b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matchesNoneOf(CharSequence sequence) { 5528b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return original.matchesAllOf(sequence); 5538b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5548b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public int countIn(CharSequence sequence) { 5558b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.length() - original.countIn(sequence); 5568b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5578b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher negate() { 5588b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return original; 5598b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5608b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 5618b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5628b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5638b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 5648b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a matcher that matches any character matched by both this matcher 5658b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * and {@code other}. 5668b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 5678b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public CharMatcher and(CharMatcher other) { 5688b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new And(Arrays.asList(this, checkNotNull(other))); 5698b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5708b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5718b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira private static class And extends CharMatcher { 5728b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira List<CharMatcher> components; 5738b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5748b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira And(List<CharMatcher> components) { 5758b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira this.components = components; // Skip defensive copy (private) 5768b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5778b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5788b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 5798b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (CharMatcher matcher : components) { 5808b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (!matcher.matches(c)) { 5818b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return false; 5828b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5838b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5848b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return true; 5858b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5868b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5878b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher and(CharMatcher other) { 5888b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components); 5898b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira newComponents.add(checkNotNull(other)); 5908b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new And(newComponents); 5918b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5928b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 5938b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 5948b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 5958b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a matcher that matches any character matched by either this matcher 5968b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * or {@code other}. 5978b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 5988b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public CharMatcher or(CharMatcher other) { 5998b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new Or(Arrays.asList(this, checkNotNull(other))); 6008b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6018b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6028b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira private static class Or extends CharMatcher { 6038b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira List<CharMatcher> components; 6048b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6058b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira Or(List<CharMatcher> components) { 6068b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira this.components = components; // Skip defensive copy (private) 6078b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6088b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6098b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 6108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (CharMatcher matcher : components) { 6118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (matcher.matches(c)) { 6128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return true; 6138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return false; 6168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6178b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher or(CharMatcher other) { 6198b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components); 6208b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira newComponents.add(checkNotNull(other)); 6218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new Or(newComponents); 6228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override protected void setBits(LookupTable table) { 6258b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (CharMatcher matcher : components) { 6268b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira matcher.setBits(table); 6278b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6288b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6298b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6308b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6318b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 6328b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a {@code char} matcher functionally equivalent to this one, but 6338b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * which may be faster to query than the original; your mileage may vary. 6348b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Precomputation takes time and is likely to be worthwhile only if the 6358b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * precomputed matcher is queried many thousands of times. 6368b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 6378b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>This method has no effect (returns {@code this}) when called in GWT: 6388b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * it's unclear whether a precomputed matcher is faster, but it certainly 6398b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * consumes more memory, which doesn't seem like a worthwhile tradeoff in a 6408b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * browser. 6418b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 6428b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public CharMatcher precomputed() { 6438b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return Platform.precomputeCharMatcher(this); 6448b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6458b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6468b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 6478b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * This is the actual implementation of {@link #precomputed}, but we bounce 6488b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * calls through a method on {@link Platform} so that we can have different 6498b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * behavior in GWT. 6508b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 6518b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>The default precomputation is to cache the configuration of the original 6528b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * matcher in an eight-kilobyte bit array. In some situations this produces a 6538b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * matcher which is faster to query than the original. 6548b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 6558b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>The default implementation creates a new bit array and passes it to 6568b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * {@link #setBits(LookupTable)}. 6578b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 6588b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira CharMatcher precomputedInternal() { 6598b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira final LookupTable table = new LookupTable(); 6608b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira setBits(table); 6618b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6628b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new CharMatcher() { 6638b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public boolean matches(char c) { 6648b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return table.get(c); 6658b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6668b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6678b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // TODO: make methods like negate() smart 6688b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6698b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira @Override public CharMatcher precomputed() { 6708b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return this; 6718b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6728b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira }; 6738b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6748b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6758b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 6768b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * For use by implementors; sets the bit corresponding to each character ('\0' 6778b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * to '{@literal \}uFFFF') that matches this matcher in the given bit array, 6788b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * leaving all other bits untouched. 6798b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 6808b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>The default implementation loops over every possible character value, 6818b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * invoking {@link #matches} for each one. 6828b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 6838b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira protected void setBits(LookupTable table) { 6848b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira char c = Character.MIN_VALUE; 6858b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira while (true) { 6868b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (matches(c)) { 6878b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira table.set(c); 6888b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6898b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (c++ == Character.MAX_VALUE) { 6908b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira break; 6918b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6928b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6938b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 6948b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 6958b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 6968b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * A bit array with one bit per {@code char} value, used by {@link 6978b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * CharMatcher#precomputed}. 6988b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 6998b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>TODO: possibly share a common BitArray class with BloomFilter 7008b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * and others... a simpler java.util.BitSet. 7018b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 7028b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira protected static class LookupTable { 7038b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int[] data = new int[2048]; 7048b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 7058b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira void set(char index) { 7068b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira data[index >> 5] |= (1 << index); 7078b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7088b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira boolean get(char index) { 7098b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return (data[index >> 5] & (1 << index)) != 0; 7108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 7138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // Text processing routines 7148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 7158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 7168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns {@code true} if a character sequence contains only matching 7178b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * characters. 7188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 7198b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>The default implementation iterates over the sequence, invoking {@link 7208b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * #matches} for each character, until this returns {@code false} or the end 7218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * is reached. 7228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 7238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param sequence the character sequence to examine, possibly empty 7248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @return {@code true} if this matcher matches every character in the 7258b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * sequence, including when the sequence is empty 7268b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 7278b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public boolean matchesAllOf(CharSequence sequence) { 7288b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (int i = sequence.length() - 1; i >= 0; i--) { 7298b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (!matches(sequence.charAt(i))) { 7308b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return false; 7318b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7328b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7338b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return true; 7348b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7358b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 7368b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 7378b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns {@code true} if a character sequence contains no matching 7388b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * characters. 7398b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 7408b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>The default implementation iterates over the sequence, invoking {@link 7418b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * #matches} for each character, until this returns {@code false} or the end is 7428b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * reached. 7438b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 7448b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param sequence the character sequence to examine, possibly empty 7458b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @return {@code true} if this matcher matches every character in the 7468b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * sequence, including when the sequence is empty 7478b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 7488b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public boolean matchesNoneOf(CharSequence sequence) { 7498b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return indexIn(sequence) == -1; 7508b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7518b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 7528b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // TODO: perhaps add matchesAnyOf() 7538b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 7548b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 7558b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns the index of the first matching character in a character sequence, 7568b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * or {@code -1} if no matching character is present. 7578b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 7588b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>The default implementation iterates over the sequence in forward order 7598b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * calling {@link #matches} for each character. 7608b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 7618b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param sequence the character sequence to examine from the beginning 7628b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @return an index, or {@code -1} if no character matches 7638b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 7648b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public int indexIn(CharSequence sequence) { 7658b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int length = sequence.length(); 7668b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (int i = 0; i < length; i++) { 7678b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (matches(sequence.charAt(i))) { 7688b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return i; 7698b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7708b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7718b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return -1; 7728b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7738b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 7748b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 7758b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns the index of the first matching character in a character sequence, 7768b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * starting from a given position, or {@code -1} if no character matches after 7778b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * that position. 7788b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 7798b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>The default implementation iterates over the sequence in forward order, 7808b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * beginning at {@code start}, calling {@link #matches} for each character. 7818b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 7828b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param sequence the character sequence to examine 7838b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param start the first index to examine; must be nonnegative and no 7848b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * greater than {@code sequence.length()} 7858b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @return the index of the first matching character, guaranteed to be no less 7868b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * than {@code start}, or {@code -1} if no character matches 7878b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @throws IndexOutOfBoundsException if start is negative or greater than 7888b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * {@code sequence.length()} 7898b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 7908b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public int indexIn(CharSequence sequence, int start) { 7918b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int length = sequence.length(); 7928b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira Preconditions.checkPositionIndex(start, length); 7938b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (int i = start; i < length; i++) { 7948b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (matches(sequence.charAt(i))) { 7958b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return i; 7968b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7978b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 7988b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return -1; 7998b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8008b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 8018b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 8028b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns the index of the last matching character in a character sequence, 8038b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * or {@code -1} if no matching character is present. 8048b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 8058b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>The default implementation iterates over the sequence in reverse order 8068b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * calling {@link #matches} for each character. 8078b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 8088b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param sequence the character sequence to examine from the end 8098b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @return an index, or {@code -1} if no character matches 8108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 8118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public int lastIndexIn(CharSequence sequence) { 8128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (int i = sequence.length() - 1; i >= 0; i--) { 8138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (matches(sequence.charAt(i))) { 8148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return i; 8158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8178b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return -1; 8188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8198b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 8208b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 8218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns the number of matching characters found in a character sequence. 8228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 8238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public int countIn(CharSequence sequence) { 8248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int count = 0; 8258b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (int i = 0; i < sequence.length(); i++) { 8268b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (matches(sequence.charAt(i))) { 8278b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira count++; 8288b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8298b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8308b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return count; 8318b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8328b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 8338b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 8348b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a string containing all non-matching characters of a character 8358b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * sequence, in order. For example: <pre> {@code 8368b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 8378b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * CharMatcher.is('a').removeFrom("bazaar")}</pre> 8388b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 8398b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * ... returns {@code "bzr"}. 8408b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 8418b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public String removeFrom(CharSequence sequence) { 8428b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira String string = sequence.toString(); 8438b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int pos = indexIn(string); 8448b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (pos == -1) { 8458b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return string; 8468b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8478b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 8488b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira char[] chars = string.toCharArray(); 8498b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int spread = 1; 8508b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 8518b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // This unusual loop comes from extensive benchmarking 8528b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira OUT: 8538b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira while (true) { 8548b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira pos++; 8558b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira while (true) { 8568b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (pos == chars.length) { 8578b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira break OUT; 8588b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8598b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (matches(chars[pos])) { 8608b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira break; 8618b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8628b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira chars[pos - spread] = chars[pos]; 8638b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira pos++; 8648b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8658b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira spread++; 8668b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8678b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new String(chars, 0, pos - spread); 8688b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8698b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 8708b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 8718b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a string containing all matching characters of a character 8728b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * sequence, in order. For example: <pre> {@code 8738b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 8748b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * CharMatcher.is('a').retainFrom("bazaar")}</pre> 8758b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 8768b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * ... returns {@code "aaa"}. 8778b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 8788b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public String retainFrom(CharSequence sequence) { 8798b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return negate().removeFrom(sequence); 8808b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 8818b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 8828b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 8838b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a string copy of the input character sequence, with each character 8848b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * that matches this matcher replaced by a given replacement character. For 8858b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * example: <pre> {@code 8868b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 8878b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * CharMatcher.is('a').replaceFrom("radar", 'o')}</pre> 8888b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 8898b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * ... returns {@code "rodor"}. 8908b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 8918b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>The default implementation uses {@link #indexIn(CharSequence)} to find 8928b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * the first matching character, then iterates the remainder of the sequence 8938b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * calling {@link #matches(char)} for each character. 8948b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 8958b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param sequence the character sequence to replace matching characters in 8968b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param replacement the character to append to the result string in place of 8978b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * each matching character in {@code sequence} 8988b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @return the new string 8998b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 9008b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public String replaceFrom(CharSequence sequence, char replacement) { 9018b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira String string = sequence.toString(); 9028b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int pos = indexIn(string); 9038b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (pos == -1) { 9048b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return string; 9058b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9068b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira char[] chars = string.toCharArray(); 9078b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira chars[pos] = replacement; 9088b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (int i = pos + 1; i < chars.length; i++) { 9098b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (matches(chars[i])) { 9108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira chars[i] = replacement; 9118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return new String(chars); 9148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 9168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 9178b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a string copy of the input character sequence, with each character 9188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * that matches this matcher replaced by a given replacement sequence. For 9198b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * example: <pre> {@code 9208b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 9218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre> 9228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 9238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * ... returns {@code "yoohoo"}. 9248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 9258b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p><b>Note:</b> If the replacement is a fixed string with only one character, 9268b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * you are better off calling {@link #replaceFrom(CharSequence, char)} directly. 9278b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 9288b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param sequence the character sequence to replace matching characters in 9298b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param replacement the characters to append to the result string in place 9308b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * of each matching character in {@code sequence} 9318b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @return the new string 9328b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 9338b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public String replaceFrom(CharSequence sequence, CharSequence replacement) { 9348b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int replacementLen = replacement.length(); 9358b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (replacementLen == 0) { 9368b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return removeFrom(sequence); 9378b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9388b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (replacementLen == 1) { 9398b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return replaceFrom(sequence, replacement.charAt(0)); 9408b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9418b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 9428b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira String string = sequence.toString(); 9438b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int pos = indexIn(string); 9448b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (pos == -1) { 9458b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return string; 9468b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9478b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 9488b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int len = string.length(); 9498b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira StringBuilder buf = new StringBuilder((int) (len * 1.5) + 16); 9508b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 9518b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int oldpos = 0; 9528b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira do { 9538b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira buf.append(string, oldpos, pos); 9548b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira buf.append(replacement); 9558b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira oldpos = pos + 1; 9568b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira pos = indexIn(string, oldpos); 9578b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } while (pos != -1); 9588b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 9598b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira buf.append(string, oldpos, len); 9608b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return buf.toString(); 9618b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9628b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 9638b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 9648b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a substring of the input character sequence that omits all 9658b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * characters this matcher matches from the beginning and from the end of the 9668b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * string. For example: <pre> {@code 9678b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 9688b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre> 9698b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 9708b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * ... returns {@code "cat"}. 9718b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 9728b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>Note that<pre> {@code 9738b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 9748b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre> 9758b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 9768b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * ... is equivalent to {@link String#trim()}. 9778b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 9788b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public String trimFrom(CharSequence sequence) { 9798b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int len = sequence.length(); 9808b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int first; 9818b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int last; 9828b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 9838b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (first = 0; first < len; first++) { 9848b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (!matches(sequence.charAt(first))) { 9858b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira break; 9868b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9878b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9888b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (last = len - 1; last > first; last--) { 9898b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (!matches(sequence.charAt(last))) { 9908b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira break; 9918b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9928b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9938b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 9948b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.subSequence(first, last + 1).toString(); 9958b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 9968b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 9978b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 9988b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a substring of the input character sequence that omits all 9998b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * characters this matcher matches from the beginning of the 10008b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * string. For example: <pre> {@code 10018b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 10028b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre> 10038b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 10048b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * ... returns {@code "catbab"}. 10058b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 10068b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public String trimLeadingFrom(CharSequence sequence) { 10078b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int len = sequence.length(); 10088b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int first; 10098b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 10108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (first = 0; first < len; first++) { 10118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (!matches(sequence.charAt(first))) { 10128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira break; 10138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 10168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.subSequence(first, len).toString(); 10178b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 10198b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 10208b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a substring of the input character sequence that omits all 10218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * characters this matcher matches from the end of the 10228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * string. For example: <pre> {@code 10238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 10248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre> 10258b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 10268b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * ... returns {@code "abacat"}. 10278b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 10288b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public String trimTrailingFrom(CharSequence sequence) { 10298b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int len = sequence.length(); 10308b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int last; 10318b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 10328b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (last = len - 1; last >= 0; last--) { 10338b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (!matches(sequence.charAt(last))) { 10348b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira break; 10358b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10368b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10378b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 10388b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.subSequence(0, last + 1).toString(); 10398b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10408b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 10418b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 10428b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns a string copy of the input character sequence, with each group of 10438b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * consecutive characters that match this matcher replaced by a single 10448b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * replacement character. For example: <pre> {@code 10458b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 10468b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre> 10478b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 10488b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * ... returns {@code "b-p-r"}. 10498b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 10508b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * <p>The default implementation uses {@link #indexIn(CharSequence)} to find 10518b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * the first matching character, then iterates the remainder of the sequence 10528b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * calling {@link #matches(char)} for each character. 10538b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 10548b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param sequence the character sequence to replace matching groups of 10558b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * characters in 10568b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @param replacement the character to append to the result string in place of 10578b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * each group of matching characters in {@code sequence} 10588b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @return the new string 10598b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 10608b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public String collapseFrom(CharSequence sequence, char replacement) { 10618b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int first = indexIn(sequence); 10628b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (first == -1) { 10638b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return sequence.toString(); 10648b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10658b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 10668b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // TODO: this implementation can probably be made faster. 10678b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 10688b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira StringBuilder builder = new StringBuilder(sequence.length()) 10698b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .append(sequence.subSequence(0, first)) 10708b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira .append(replacement); 10718b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira boolean in = true; 10728b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (int i = first + 1; i < sequence.length(); i++) { 10738b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira char c = sequence.charAt(i); 10748b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (apply(c)) { 10758b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (!in) { 10768b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira builder.append(replacement); 10778b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira in = true; 10788b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10798b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } else { 10808b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira builder.append(c); 10818b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira in = false; 10828b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10838b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10848b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return builder.toString(); 10858b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10868b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 10878b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 10888b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Collapses groups of matching characters exactly as {@link #collapseFrom} 10898b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * does, except that groups of matching characters at the start or end of the 10908b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * sequence are removed without replacement. 10918b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 10928b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira public String trimAndCollapseFrom(CharSequence sequence, char replacement) { 10938b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira int first = negate().indexIn(sequence); 10948b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (first == -1) { 10958b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return ""; // everything matches. nothing's left. 10968b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 10978b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira StringBuilder builder = new StringBuilder(sequence.length()); 10988b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira boolean inMatchingGroup = false; 10998b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira for (int i = first; i < sequence.length(); i++) { 11008b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira char c = sequence.charAt(i); 11018b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (apply(c)) { 11028b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira inMatchingGroup = true; 11038b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } else { 11048b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira if (inMatchingGroup) { 11058b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira builder.append(replacement); 11068b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira inMatchingGroup = false; 11078b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 11088b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira builder.append(c); 11098b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 11108b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 11118b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return builder.toString(); 11128b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 11138b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 11148b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira // Predicate interface 11158b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira 11168b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /** 11178b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * Returns {@code true} if this matcher matches the given character. 11188b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * 11198b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira * @throws NullPointerException if {@code character} is null 11208b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira */ 11218b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira /*@Override*/ public boolean apply(Character character) { 11228b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira return matches(character); 11238b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira } 11248b99ba451db6973978e60f91da2199686a9c85e7Mindy Pereira}