12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************** 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2010-2014, Google, International Business Machines Corporation * 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and others. All Rights Reserved. * 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************** 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.lang; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A number of utilities for dealing with CharSequences and related classes. 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For accessing codepoints with a CharSequence, also see 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <ul> 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link java.lang.Character#codePointAt(CharSequence, int)}</li> 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link java.lang.Character#codePointBefore(CharSequence, int)}</li> 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link java.lang.Character#codePointCount(CharSequence, int, int)}</li> 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link java.lang.Character#charCount(int)}</li> 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link java.lang.Character#offsetByCodePoints(CharSequence, int, int)}</li> 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link java.lang.Character#toChars(int, char[], int)}</li> 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link java.lang.Character#toCodePoint(char, char)}</li> 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </ul> 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author markdavis 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert@Deprecated 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class CharSequences { 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // compareTo(a, b); 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // compareToIgnoreCase(a, b) 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // contentEquals(a, b) 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // contentEqualsIgnoreCase(a, b) 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // contains(a, b) => indexOf >= 0 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // endsWith(a, b) 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // startsWith(a, b) 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // lastIndexOf(a, b, fromIndex) 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // indexOf(a, ch, fromIndex) 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // lastIndexOf(a, ch, fromIndex); 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // s.trim() => UnicodeSet.trim(CharSequence s); return a subsequence starting with the first character not in the set to the last character not in the set. 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // add UnicodeSet.split(CharSequence s); 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Find the longest n such that a[aIndex,n] = b[bIndex,n], and n is on a character boundary. 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex) { 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int i = aIndex, j = bIndex; 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int alen = a.length(); 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int blen = b.length(); 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (; i < alen && j < blen; ++i, ++j) { 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char ca = a.charAt(i); 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char cb = b.charAt(j); 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (ca != cb) { 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if we failed a match make sure that we didn't match half a character 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int result = i - aIndex; 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (result != 0 && !onCharacterBoundary(a, i) && !onCharacterBoundary(b, j)) { 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --result; // backup 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Count the code point length. Unpaired surrogates count as 1. 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int codePointLength(CharSequence s) { 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Character.codePointCount(s, 0, s.length()); 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// int length = s.length(); 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// int result = length; 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// for (int i = 1; i < length; ++i) { 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// char ch = s.charAt(i); 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// if (0xDC00 <= ch && ch <= 0xDFFF) { 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// char ch0 = s.charAt(i-1); 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// if (0xD800 <= ch && ch <= 0xDbFF) { 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// --result; 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// } 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// } 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// } 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Utility function for comparing codepoint to string without generating new 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * string. 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final boolean equals(int codepoint, CharSequence other) { 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (other == null) { 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert switch (other.length()) { 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 1: return codepoint == other.charAt(0); 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert case 2: return codepoint > 0xFFFF && codepoint == Character.codePointAt(other, 0); 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert default: return false; 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final boolean equals(CharSequence other, int codepoint) { 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return equals(codepoint, other); 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Utility to compare a string to a code point. 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString()) 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and comparing, but much faster (no object creation). 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Actually, there is one difference; a null compares as less. 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Note that this (=String) order is UTF-16 order -- *not* code point order. 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int compare(CharSequence string, int codePoint) { 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (codePoint < Character.MIN_CODE_POINT || codePoint > Character.MAX_CODE_POINT) { 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException(); 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int stringLength = string.length(); 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (stringLength == 0) { 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return -1; 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char firstChar = string.charAt(0); 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT; 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (offset < 0) { // BMP codePoint 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int result = firstChar - codePoint; 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (result != 0) { 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return stringLength - 1; 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // non BMP 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char lead = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE); 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int result = firstChar - lead; 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (result != 0) { 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (stringLength > 1) { 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char trail = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE); 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result = string.charAt(1) - trail; 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (result != 0) { 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return stringLength - 2; 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Utility to compare a string to a code point. 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Same results as turning the code point into a string and comparing, but much faster (no object creation). 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Actually, there is one difference; a null compares as less. 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Note that this (=String) order is UTF-16 order -- *not* code point order. 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int compare(int codepoint, CharSequence a) { 1772d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert int result = compare(a, codepoint); 1782d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert return result > 0 ? -1 : result < 0 ? 1 : 0; // Reverse the order. 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE. 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int getSingleCodePoint(CharSequence s) { 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int length = s.length(); 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (length < 1 || length > 2) { 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Integer.MAX_VALUE; 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int result = Character.codePointAt(s, 0); 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (result < 0x10000) == (length == 1) ? result : Integer.MAX_VALUE; 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Utility function for comparing objects that may be null 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * string. 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final <T extends Object> boolean equals(T a, T b) { 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return a == null ? b == null 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert : b == null ? false 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert : a.equals(b); 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Utility for comparing the contents of CharSequences 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int compare(CharSequence a, CharSequence b) { 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int alength = a.length(); 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int blength = b.length(); 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int min = alength <= blength ? alength : blength; 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < min; ++i) { 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int diff = a.charAt(i) - b.charAt(i); 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (diff != 0) { 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return diff; 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return alength - blength; 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Utility for comparing the contents of CharSequences 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static boolean equalsChars(CharSequence a, CharSequence b) { 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // do length test first for fast path 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return a.length() == b.length() && compare(a,b) == 0; 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Are we on a character boundary? 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static boolean onCharacterBoundary(CharSequence s, int i) { 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return i <= 0 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert || i >= s.length() 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert || !Character.isHighSurrogate(s.charAt(i-1)) 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert || !Character.isLowSurrogate(s.charAt(i)); 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Find code point in string. 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int indexOf(CharSequence s, int codePoint) { 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int cp; 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cp = Character.codePointAt(s, i); 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (cp == codePoint) { 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return i; 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return -1; 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Utility function for simplified, more robust loops, such as: 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre> 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for (int codePoint : CharSequences.codePoints(string)) { 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * doSomethingWith(codePoint); 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * } 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre> 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int[] codePoints(CharSequence s) { 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int[] result = new int[s.length()]; // in the vast majority of cases, the length is the same 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int j = 0; 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < s.length(); ++i) { 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char cp = s.charAt(i); 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (cp >= 0xDC00 && cp <= 0xDFFF && i != 0 ) { // hand-code for speed 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char last = (char) result[j-1]; 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (last >= 0xD800 && last <= 0xDBFF) { 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Note: j-1 is safe, because j can only be zero if i is zero. But i!=0 in this block. 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result[j-1] = Character.toCodePoint(last, cp); 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result[j++] = cp; 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (j == result.length) { 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int[] shortResult = new int[j]; 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.arraycopy(result, 0, shortResult, 0, j); 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return shortResult; 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private CharSequences() { 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 313