StringUtils.java revision eac8670830a172dd2ba2fea0632ecc7f7ec95992
11d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes/* 21d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * Copyright (C) 2012 The Android Open Source Project 31d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * 41d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * Licensed under the Apache License, Version 2.0 (the "License"); 51d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * you may not use this file except in compliance with the License. 61d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * You may obtain a copy of the License at 71d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * 81d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * http://www.apache.org/licenses/LICENSE-2.0 91d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * 101d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * Unless required by applicable law or agreed to in writing, software 111d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * distributed under the License is distributed on an "AS IS" BASIS, 121d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 131d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * See the License for the specific language governing permissions and 141d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes * limitations under the License. 151d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes */ 161d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes 171d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughespackage com.android.inputmethod.latin.utils; 181d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes 191d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughesimport static com.android.inputmethod.latin.Constants.CODE_UNSPECIFIED; 201d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes 211d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughesimport android.text.Spanned; 221d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughesimport android.text.TextUtils; 231d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes 241d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughesimport com.android.inputmethod.annotations.UsedForTesting; 251d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughesimport com.android.inputmethod.latin.Constants; 261d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes 271d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughesimport java.util.ArrayList; 281d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughesimport java.util.Arrays; 291d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughesimport java.util.Locale; 301d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughesimport java.util.regex.Matcher; 311d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughesimport java.util.regex.Pattern; 321d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes 331d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughespublic final class StringUtils { 341d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case 351d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes public static final int CAPITALIZE_FIRST = 1; // First only 361d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes public static final int CAPITALIZE_ALL = 2; // All caps 371d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes 381d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes private static final String EMPTY_STRING = ""; 391d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes 401d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes private StringUtils() { 411d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes // This utility class is not publicly instantiable. 421d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes } 431d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes 441d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes public static int codePointCount(final String text) { 451d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes if (TextUtils.isEmpty(text)) return 0; 461d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes return text.codePointCount(0, text.length()); 471d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes } 481d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes 491d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes public static String newSingleCodePointString(int codePoint) { 501d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes if (Character.charCount(codePoint) == 1) { 511d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes // Optimization: avoid creating a temporary array for characters that are 521d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes // represented by a single char value 531d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes return String.valueOf((char) codePoint); 541d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes } 551d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes // For surrogate pair 561d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes return new String(Character.toChars(codePoint)); 571d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes } 581d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes 591d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes public static boolean containsInArray(final String text, final String[] array) { 601d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes for (final String element : array) { 611d18e9cc0ab8b2f6d860c4f7d85c33eb5cde4179Elliott Hughes if (text.equals(element)) return true; 62 } 63 return false; 64 } 65 66 /** 67 * Comma-Splittable Text is similar to Comma-Separated Values (CSV) but has much simpler syntax. 68 * Unlike CSV, Comma-Splittable Text has no escaping mechanism, so that the text can't contain 69 * a comma character in it. 70 */ 71 private static final String SEPARATOR_FOR_COMMA_SPLITTABLE_TEXT = ","; 72 73 public static boolean containsInCommaSplittableText(final String text, 74 final String extraValues) { 75 if (TextUtils.isEmpty(extraValues)) { 76 return false; 77 } 78 return containsInArray(text, extraValues.split(SEPARATOR_FOR_COMMA_SPLITTABLE_TEXT)); 79 } 80 81 public static String removeFromCommaSplittableTextIfExists(final String text, 82 final String extraValues) { 83 if (TextUtils.isEmpty(extraValues)) { 84 return EMPTY_STRING; 85 } 86 final String[] elements = extraValues.split(SEPARATOR_FOR_COMMA_SPLITTABLE_TEXT); 87 if (!containsInArray(text, elements)) { 88 return extraValues; 89 } 90 final ArrayList<String> result = new ArrayList<>(elements.length - 1); 91 for (final String element : elements) { 92 if (!text.equals(element)) { 93 result.add(element); 94 } 95 } 96 return TextUtils.join(SEPARATOR_FOR_COMMA_SPLITTABLE_TEXT, result); 97 } 98 99 /** 100 * Remove duplicates from an array of strings. 101 * 102 * This method will always keep the first occurrence of all strings at their position 103 * in the array, removing the subsequent ones. 104 */ 105 public static void removeDupes(final ArrayList<String> suggestions) { 106 if (suggestions.size() < 2) return; 107 int i = 1; 108 // Don't cache suggestions.size(), since we may be removing items 109 while (i < suggestions.size()) { 110 final String cur = suggestions.get(i); 111 // Compare each suggestion with each previous suggestion 112 for (int j = 0; j < i; j++) { 113 final String previous = suggestions.get(j); 114 if (TextUtils.equals(cur, previous)) { 115 suggestions.remove(i); 116 i--; 117 break; 118 } 119 } 120 i++; 121 } 122 } 123 124 public static String capitalizeFirstCodePoint(final String s, final Locale locale) { 125 if (s.length() <= 1) { 126 return s.toUpperCase(locale); 127 } 128 // Please refer to the comment below in 129 // {@link #capitalizeFirstAndDowncaseRest(String,Locale)} as this has the same shortcomings 130 final int cutoff = s.offsetByCodePoints(0, 1); 131 return s.substring(0, cutoff).toUpperCase(locale) + s.substring(cutoff); 132 } 133 134 public static String capitalizeFirstAndDowncaseRest(final String s, final Locale locale) { 135 if (s.length() <= 1) { 136 return s.toUpperCase(locale); 137 } 138 // TODO: fix the bugs below 139 // - This does not work for Greek, because it returns upper case instead of title case. 140 // - It does not work for Serbian, because it fails to account for the "lj" character, 141 // which should be "Lj" in title case and "LJ" in upper case. 142 // - It does not work for Dutch, because it fails to account for the "ij" digraph when it's 143 // written as two separate code points. They are two different characters but both should 144 // be capitalized as "IJ" as if they were a single letter in most words (not all). If the 145 // unicode char for the ligature is used however, it works. 146 final int cutoff = s.offsetByCodePoints(0, 1); 147 return s.substring(0, cutoff).toUpperCase(locale) + s.substring(cutoff).toLowerCase(locale); 148 } 149 150 private static final int[] EMPTY_CODEPOINTS = {}; 151 152 public static int[] toCodePointArray(final CharSequence charSequence) { 153 return toCodePointArray(charSequence, 0, charSequence.length()); 154 } 155 156 /** 157 * Converts a range of a string to an array of code points. 158 * @param charSequence the source string. 159 * @param startIndex the start index inside the string in java chars, inclusive. 160 * @param endIndex the end index inside the string in java chars, exclusive. 161 * @return a new array of code points. At most endIndex - startIndex, but possibly less. 162 */ 163 public static int[] toCodePointArray(final CharSequence charSequence, 164 final int startIndex, final int endIndex) { 165 final int length = charSequence.length(); 166 if (length <= 0) { 167 return EMPTY_CODEPOINTS; 168 } 169 final int[] codePoints = 170 new int[Character.codePointCount(charSequence, startIndex, endIndex)]; 171 copyCodePointsAndReturnCodePointCount(codePoints, charSequence, startIndex, endIndex, 172 false /* downCase */); 173 return codePoints; 174 } 175 176 /** 177 * Copies the codepoints in a CharSequence to an int array. 178 * 179 * This method assumes there is enough space in the array to store the code points. The size 180 * can be measured with Character#codePointCount(CharSequence, int, int) before passing to this 181 * method. If the int array is too small, an ArrayIndexOutOfBoundsException will be thrown. 182 * Also, this method makes no effort to be thread-safe. Do not modify the CharSequence while 183 * this method is running, or the behavior is undefined. 184 * This method can optionally downcase code points before copying them, but it pays no attention 185 * to locale while doing so. 186 * 187 * @param destination the int array. 188 * @param charSequence the CharSequence. 189 * @param startIndex the start index inside the string in java chars, inclusive. 190 * @param endIndex the end index inside the string in java chars, exclusive. 191 * @param downCase if this is true, code points will be downcased before being copied. 192 * @return the number of copied code points. 193 */ 194 public static int copyCodePointsAndReturnCodePointCount(final int[] destination, 195 final CharSequence charSequence, final int startIndex, final int endIndex, 196 final boolean downCase) { 197 int destIndex = 0; 198 for (int index = startIndex; index < endIndex; 199 index = Character.offsetByCodePoints(charSequence, index, 1)) { 200 final int codePoint = Character.codePointAt(charSequence, index); 201 // TODO: stop using this, as it's not aware of the locale and does not always do 202 // the right thing. 203 destination[destIndex] = downCase ? Character.toLowerCase(codePoint) : codePoint; 204 destIndex++; 205 } 206 return destIndex; 207 } 208 209 public static int[] toSortedCodePointArray(final String string) { 210 final int[] codePoints = toCodePointArray(string); 211 Arrays.sort(codePoints); 212 return codePoints; 213 } 214 215 /** 216 * Construct a String from a code point array 217 * 218 * @param codePoints a code point array that is null terminated when its logical length is 219 * shorter than the array length. 220 * @return a string constructed from the code point array. 221 */ 222 public static String getStringFromNullTerminatedCodePointArray(final int[] codePoints) { 223 int stringLength = codePoints.length; 224 for (int i = 0; i < codePoints.length; i++) { 225 if (codePoints[i] == 0) { 226 stringLength = i; 227 break; 228 } 229 } 230 return new String(codePoints, 0 /* offset */, stringLength); 231 } 232 233 // This method assumes the text is not null. For the empty string, it returns CAPITALIZE_NONE. 234 public static int getCapitalizationType(final String text) { 235 // If the first char is not uppercase, then the word is either all lower case or 236 // camel case, and in either case we return CAPITALIZE_NONE. 237 final int len = text.length(); 238 int index = 0; 239 for (; index < len; index = text.offsetByCodePoints(index, 1)) { 240 if (Character.isLetter(text.codePointAt(index))) { 241 break; 242 } 243 } 244 if (index == len) return CAPITALIZE_NONE; 245 if (!Character.isUpperCase(text.codePointAt(index))) { 246 return CAPITALIZE_NONE; 247 } 248 int capsCount = 1; 249 int letterCount = 1; 250 for (index = text.offsetByCodePoints(index, 1); index < len; 251 index = text.offsetByCodePoints(index, 1)) { 252 if (1 != capsCount && letterCount != capsCount) break; 253 final int codePoint = text.codePointAt(index); 254 if (Character.isUpperCase(codePoint)) { 255 ++capsCount; 256 ++letterCount; 257 } else if (Character.isLetter(codePoint)) { 258 // We need to discount non-letters since they may not be upper-case, but may 259 // still be part of a word (e.g. single quote or dash, as in "IT'S" or "FULL-TIME") 260 ++letterCount; 261 } 262 } 263 // We know the first char is upper case. So we want to test if either every letter other 264 // than the first is lower case, or if they are all upper case. If the string is exactly 265 // one char long, then we will arrive here with letterCount 1, and this is correct, too. 266 if (1 == capsCount) return CAPITALIZE_FIRST; 267 return (letterCount == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); 268 } 269 270 public static boolean isIdenticalAfterUpcase(final String text) { 271 final int length = text.length(); 272 int i = 0; 273 while (i < length) { 274 final int codePoint = text.codePointAt(i); 275 if (Character.isLetter(codePoint) && !Character.isUpperCase(codePoint)) { 276 return false; 277 } 278 i += Character.charCount(codePoint); 279 } 280 return true; 281 } 282 283 public static boolean isIdenticalAfterDowncase(final String text) { 284 final int length = text.length(); 285 int i = 0; 286 while (i < length) { 287 final int codePoint = text.codePointAt(i); 288 if (Character.isLetter(codePoint) && !Character.isLowerCase(codePoint)) { 289 return false; 290 } 291 i += Character.charCount(codePoint); 292 } 293 return true; 294 } 295 296 public static boolean isIdenticalAfterCapitalizeEachWord(final String text, 297 final int[] sortedSeparators) { 298 boolean needsCapsNext = true; 299 final int len = text.length(); 300 for (int i = 0; i < len; i = text.offsetByCodePoints(i, 1)) { 301 final int codePoint = text.codePointAt(i); 302 if (Character.isLetter(codePoint)) { 303 if ((needsCapsNext && !Character.isUpperCase(codePoint)) 304 || (!needsCapsNext && !Character.isLowerCase(codePoint))) { 305 return false; 306 } 307 } 308 // We need a capital letter next if this is a separator. 309 needsCapsNext = (Arrays.binarySearch(sortedSeparators, codePoint) >= 0); 310 } 311 return true; 312 } 313 314 // TODO: like capitalizeFirst*, this does not work perfectly for Dutch because of the IJ digraph 315 // which should be capitalized together in *some* cases. 316 public static String capitalizeEachWord(final String text, final int[] sortedSeparators, 317 final Locale locale) { 318 final StringBuilder builder = new StringBuilder(); 319 boolean needsCapsNext = true; 320 final int len = text.length(); 321 for (int i = 0; i < len; i = text.offsetByCodePoints(i, 1)) { 322 final String nextChar = text.substring(i, text.offsetByCodePoints(i, 1)); 323 if (needsCapsNext) { 324 builder.append(nextChar.toUpperCase(locale)); 325 } else { 326 builder.append(nextChar.toLowerCase(locale)); 327 } 328 // We need a capital letter next if this is a separator. 329 needsCapsNext = (Arrays.binarySearch(sortedSeparators, nextChar.codePointAt(0)) >= 0); 330 } 331 return builder.toString(); 332 } 333 334 /** 335 * Approximates whether the text before the cursor looks like a URL. 336 * 337 * This is not foolproof, but it should work well in the practice. 338 * Essentially it walks backward from the cursor until it finds something that's not a letter, 339 * digit, or common URL symbol like underscore. If it hasn't found a period yet, then it 340 * does not look like a URL. 341 * If the text: 342 * - starts with www and contains a period 343 * - starts with a slash preceded by either a slash, whitespace, or start-of-string 344 * Then it looks like a URL and we return true. Otherwise, we return false. 345 * 346 * Note: this method is called quite often, and should be fast. 347 * 348 * TODO: This will return that "abc./def" and ".abc/def" look like URLs to keep down the 349 * code complexity, but ideally it should not. It's acceptable for now. 350 */ 351 public static boolean lastPartLooksLikeURL(final CharSequence text) { 352 int i = text.length(); 353 if (0 == i) return false; 354 int wCount = 0; 355 int slashCount = 0; 356 boolean hasSlash = false; 357 boolean hasPeriod = false; 358 int codePoint = 0; 359 while (i > 0) { 360 codePoint = Character.codePointBefore(text, i); 361 if (codePoint < Constants.CODE_PERIOD || codePoint > 'z') { 362 // Handwavy heuristic to see if that's a URL character. Anything between period 363 // and z. This includes all lower- and upper-case ascii letters, period, 364 // underscore, arrobase, question mark, equal sign. It excludes spaces, exclamation 365 // marks, double quotes... 366 // Anything that's not a URL-like character causes us to break from here and 367 // evaluate normally. 368 break; 369 } 370 if (Constants.CODE_PERIOD == codePoint) { 371 hasPeriod = true; 372 } 373 if (Constants.CODE_SLASH == codePoint) { 374 hasSlash = true; 375 if (2 == ++slashCount) { 376 return true; 377 } 378 } else { 379 slashCount = 0; 380 } 381 if ('w' == codePoint) { 382 ++wCount; 383 } else { 384 wCount = 0; 385 } 386 i = Character.offsetByCodePoints(text, i, -1); 387 } 388 // End of the text run. 389 // If it starts with www and includes a period, then it looks like a URL. 390 if (wCount >= 3 && hasPeriod) return true; 391 // If it starts with a slash, and the code point before is whitespace, it looks like an URL. 392 if (1 == slashCount && (0 == i || Character.isWhitespace(codePoint))) return true; 393 // If it has both a period and a slash, it looks like an URL. 394 if (hasPeriod && hasSlash) return true; 395 // Otherwise, it doesn't look like an URL. 396 return false; 397 } 398 399 /** 400 * Examines the string and returns whether we're inside a double quote. 401 * 402 * This is used to decide whether we should put an automatic space before or after a double 403 * quote character. If we're inside a quotation, then we want to close it, so we want a space 404 * after and not before. Otherwise, we want to open the quotation, so we want a space before 405 * and not after. Exception: after a digit, we never want a space because the "inch" or 406 * "minutes" use cases is dominant after digits. 407 * In the practice, we determine whether we are in a quotation or not by finding the previous 408 * double quote character, and looking at whether it's followed by whitespace. If so, that 409 * was a closing quotation mark, so we're not inside a double quote. If it's not followed 410 * by whitespace, then it was an opening quotation mark, and we're inside a quotation. 411 * 412 * @param text the text to examine. 413 * @return whether we're inside a double quote. 414 */ 415 public static boolean isInsideDoubleQuoteOrAfterDigit(final CharSequence text) { 416 int i = text.length(); 417 if (0 == i) return false; 418 int codePoint = Character.codePointBefore(text, i); 419 if (Character.isDigit(codePoint)) return true; 420 int prevCodePoint = 0; 421 while (i > 0) { 422 codePoint = Character.codePointBefore(text, i); 423 if (Constants.CODE_DOUBLE_QUOTE == codePoint) { 424 // If we see a double quote followed by whitespace, then that 425 // was a closing quote. 426 if (Character.isWhitespace(prevCodePoint)) return false; 427 } 428 if (Character.isWhitespace(codePoint) && Constants.CODE_DOUBLE_QUOTE == prevCodePoint) { 429 // If we see a double quote preceded by whitespace, then that 430 // was an opening quote. No need to continue seeking. 431 return true; 432 } 433 i -= Character.charCount(codePoint); 434 prevCodePoint = codePoint; 435 } 436 // We reached the start of text. If the first char is a double quote, then we're inside 437 // a double quote. Otherwise we're not. 438 return Constants.CODE_DOUBLE_QUOTE == codePoint; 439 } 440 441 public static boolean isEmptyStringOrWhiteSpaces(final String s) { 442 final int N = codePointCount(s); 443 for (int i = 0; i < N; ++i) { 444 if (!Character.isWhitespace(s.codePointAt(i))) { 445 return false; 446 } 447 } 448 return true; 449 } 450 451 @UsedForTesting 452 public static String byteArrayToHexString(final byte[] bytes) { 453 if (bytes == null || bytes.length == 0) { 454 return EMPTY_STRING; 455 } 456 final StringBuilder sb = new StringBuilder(); 457 for (byte b : bytes) { 458 sb.append(String.format("%02x", b & 0xff)); 459 } 460 return sb.toString(); 461 } 462 463 /** 464 * Convert hex string to byte array. The string length must be an even number. 465 */ 466 @UsedForTesting 467 public static byte[] hexStringToByteArray(final String hexString) { 468 if (TextUtils.isEmpty(hexString)) { 469 return null; 470 } 471 final int N = hexString.length(); 472 if (N % 2 != 0) { 473 throw new NumberFormatException("Input hex string length must be an even number." 474 + " Length = " + N); 475 } 476 final byte[] bytes = new byte[N / 2]; 477 for (int i = 0; i < N; i += 2) { 478 bytes[i / 2] = (byte) ((Character.digit(hexString.charAt(i), 16) << 4) 479 + Character.digit(hexString.charAt(i + 1), 16)); 480 } 481 return bytes; 482 } 483 484 public static String toUpperCaseOfStringForLocale(final String text, 485 final boolean needsToUpperCase, final Locale locale) { 486 if (text == null || !needsToUpperCase) return text; 487 return text.toUpperCase(locale); 488 } 489 490 public static int toUpperCaseOfCodeForLocale(final int code, final boolean needsToUpperCase, 491 final Locale locale) { 492 if (!Constants.isLetterCode(code) || !needsToUpperCase) return code; 493 final String text = newSingleCodePointString(code); 494 final String casedText = toUpperCaseOfStringForLocale( 495 text, needsToUpperCase, locale); 496 return codePointCount(casedText) == 1 497 ? casedText.codePointAt(0) : CODE_UNSPECIFIED; 498 } 499 500 public static int getTrailingSingleQuotesCount(final CharSequence charSequence) { 501 final int lastIndex = charSequence.length() - 1; 502 int i = lastIndex; 503 while (i >= 0 && charSequence.charAt(i) == Constants.CODE_SINGLE_QUOTE) { 504 --i; 505 } 506 return lastIndex - i; 507 } 508 509 /** 510 * Splits the given {@code charSequence} with at occurrences of the given {@code regex}. 511 * <p> 512 * This is equivalent to 513 * {@code charSequence.toString().split(regex, preserveTrailingEmptySegments ? -1 : 0)} 514 * except that the spans are preserved in the result array. 515 * </p> 516 * @param input the character sequence to be split. 517 * @param regex the regex pattern to be used as the separator. 518 * @param preserveTrailingEmptySegments {@code true} to preserve the trailing empty 519 * segments. Otherwise, trailing empty segments will be removed before being returned. 520 * @return the array which contains the result. All the spans in the {@param input} is 521 * preserved. 522 */ 523 @UsedForTesting 524 public static CharSequence[] split(final CharSequence charSequence, final String regex, 525 final boolean preserveTrailingEmptySegments) { 526 // A short-cut for non-spanned strings. 527 if (!(charSequence instanceof Spanned)) { 528 // -1 means that trailing empty segments will be preserved. 529 return charSequence.toString().split(regex, preserveTrailingEmptySegments ? -1 : 0); 530 } 531 532 // Hereafter, emulate String.split for CharSequence. 533 final ArrayList<CharSequence> sequences = new ArrayList<>(); 534 final Matcher matcher = Pattern.compile(regex).matcher(charSequence); 535 int nextStart = 0; 536 boolean matched = false; 537 while (matcher.find()) { 538 sequences.add(charSequence.subSequence(nextStart, matcher.start())); 539 nextStart = matcher.end(); 540 matched = true; 541 } 542 if (!matched) { 543 // never matched. preserveTrailingEmptySegments is ignored in this case. 544 return new CharSequence[] { charSequence }; 545 } 546 sequences.add(charSequence.subSequence(nextStart, charSequence.length())); 547 if (!preserveTrailingEmptySegments) { 548 for (int i = sequences.size() - 1; i >= 0; --i) { 549 if (!TextUtils.isEmpty(sequences.get(i))) { 550 break; 551 } 552 sequences.remove(i); 553 } 554 } 555 return sequences.toArray(new CharSequence[sequences.size()]); 556 } 557 558 @UsedForTesting 559 public static class Stringizer<E> { 560 public String stringize(final E element) { 561 return element != null ? element.toString() : "null"; 562 } 563 564 @UsedForTesting 565 public final String join(final E[] array) { 566 return joinStringArray(toStringArray(array), null /* delimiter */); 567 } 568 569 @UsedForTesting 570 public final String join(final E[] array, final String delimiter) { 571 return joinStringArray(toStringArray(array), delimiter); 572 } 573 574 protected String[] toStringArray(final E[] array) { 575 final String[] stringArray = new String[array.length]; 576 for (int index = 0; index < array.length; index++) { 577 stringArray[index] = stringize(array[index]); 578 } 579 return stringArray; 580 } 581 582 protected String joinStringArray(final String[] stringArray, final String delimiter) { 583 if (stringArray == null) { 584 return "null"; 585 } 586 if (delimiter == null) { 587 return Arrays.toString(stringArray); 588 } 589 final StringBuilder sb = new StringBuilder(); 590 for (int index = 0; index < stringArray.length; index++) { 591 sb.append(index == 0 ? "[" : delimiter); 592 sb.append(stringArray[index]); 593 } 594 return sb + "]"; 595 } 596 } 597} 598