1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 ********************************************************************************
5 * Copyright (C) 2010-2014, Google, International Business Machines Corporation *
6 * and others. All Rights Reserved.                                                 *
7 ********************************************************************************
8 */
9package com.ibm.icu.lang;
10
11
12/**
13 * A number of utilities for dealing with CharSequences and related classes.
14 * For accessing codepoints with a CharSequence, also see
15 * <ul>
16 * <li>{@link java.lang.Character#codePointAt(CharSequence, int)}</li>
17 * <li>{@link java.lang.Character#codePointBefore(CharSequence, int)}</li>
18 * <li>{@link java.lang.Character#codePointCount(CharSequence, int, int)}</li>
19 * <li>{@link java.lang.Character#charCount(int)}</li>
20 * <li>{@link java.lang.Character#offsetByCodePoints(CharSequence, int, int)}</li>
21 * <li>{@link java.lang.Character#toChars(int, char[], int)}</li>
22 * <li>{@link java.lang.Character#toCodePoint(char, char)}</li>
23 * </ul>
24 * @author markdavis
25 * @internal
26 * @deprecated This API is ICU internal only.
27 */
28@Deprecated
29public class CharSequences {
30    // TODO
31    // compareTo(a, b);
32    // compareToIgnoreCase(a, b)
33    // contentEquals(a, b)
34    // contentEqualsIgnoreCase(a, b)
35
36    // contains(a, b) => indexOf >= 0
37    // endsWith(a, b)
38    // startsWith(a, b)
39
40    // lastIndexOf(a, b, fromIndex)
41    // indexOf(a, ch, fromIndex)
42    // lastIndexOf(a, ch, fromIndex);
43
44    // s.trim() => UnicodeSet.trim(CharSequence s); return a subsequence starting with the first character not in the set to the last character not in the set.
45    // add UnicodeSet.split(CharSequence s);
46
47    /**
48     * Find the longest n such that a[aIndex,n] = b[bIndex,n], and n is on a character boundary.
49     * @internal
50     * @deprecated This API is ICU internal only.
51     */
52    @Deprecated
53    public static int matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex) {
54        int i = aIndex, j = bIndex;
55        int alen = a.length();
56        int blen = b.length();
57        for (; i < alen && j < blen; ++i, ++j) {
58            char ca = a.charAt(i);
59            char cb = b.charAt(j);
60            if (ca != cb) {
61                break;
62            }
63        }
64        // if we failed a match make sure that we didn't match half a character
65        int result = i - aIndex;
66        if (result != 0 && !onCharacterBoundary(a, i) && !onCharacterBoundary(b, j)) {
67            --result; // backup
68        }
69        return result;
70    }
71
72    /**
73     * Count the code point length. Unpaired surrogates count as 1.
74     * @internal
75     * @deprecated This API is ICU internal only.
76     */
77    @Deprecated
78    public int codePointLength(CharSequence s) {
79        return Character.codePointCount(s, 0, s.length());
80//        int length = s.length();
81//        int result = length;
82//        for (int i = 1; i < length; ++i) {
83//            char ch = s.charAt(i);
84//            if (0xDC00 <= ch && ch <= 0xDFFF) {
85//                char ch0 = s.charAt(i-1);
86//                if (0xD800 <= ch && ch <= 0xDbFF) {
87//                    --result;
88//                }
89//            }
90//        }
91    }
92
93    /**
94     * Utility function for comparing codepoint to string without generating new
95     * string.
96     *
97     * @internal
98     * @deprecated This API is ICU internal only.
99     */
100    @Deprecated
101    public static final boolean equals(int codepoint, CharSequence other) {
102        if (other == null) {
103            return false;
104        }
105        switch (other.length()) {
106        case 1: return codepoint == other.charAt(0);
107        case 2: return codepoint > 0xFFFF && codepoint == Character.codePointAt(other, 0);
108        default: return false;
109        }
110    }
111
112    /**
113     * @internal
114     * @deprecated This API is ICU internal only.
115     */
116    @Deprecated
117    public static final boolean equals(CharSequence other, int codepoint) {
118        return equals(codepoint, other);
119    }
120
121    /**
122     * Utility to compare a string to a code point.
123     * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString())
124     * and comparing, but much faster (no object creation).
125     * Actually, there is one difference; a null compares as less.
126     * Note that this (=String) order is UTF-16 order -- *not* code point order.
127     *
128     * @internal
129     * @deprecated This API is ICU internal only.
130     */
131    @Deprecated
132    public static int compare(CharSequence string, int codePoint) {
133        if (codePoint < Character.MIN_CODE_POINT || codePoint > Character.MAX_CODE_POINT) {
134            throw new IllegalArgumentException();
135        }
136        int stringLength = string.length();
137        if (stringLength == 0) {
138            return -1;
139        }
140        char firstChar = string.charAt(0);
141        int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT;
142
143        if (offset < 0) { // BMP codePoint
144            int result = firstChar - codePoint;
145            if (result != 0) {
146                return result;
147            }
148            return stringLength - 1;
149        }
150        // non BMP
151        char lead = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE);
152        int result = firstChar - lead;
153        if (result != 0) {
154            return result;
155        }
156        if (stringLength > 1) {
157            char trail = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE);
158            result = string.charAt(1) - trail;
159            if (result != 0) {
160                return result;
161            }
162        }
163        return stringLength - 2;
164    }
165
166    /**
167     * Utility to compare a string to a code point.
168     * Same results as turning the code point into a string and comparing, but much faster (no object creation).
169     * Actually, there is one difference; a null compares as less.
170     * Note that this (=String) order is UTF-16 order -- *not* code point order.
171     *
172     * @internal
173     * @deprecated This API is ICU internal only.
174     */
175    @Deprecated
176    public static int compare(int codepoint, CharSequence a) {
177        int result = compare(a, codepoint);
178        return result > 0 ? -1 : result < 0 ? 1 : 0; // Reverse the order.
179    }
180
181    /**
182     * Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE.
183     *
184     * @internal
185     * @deprecated This API is ICU internal only.
186     */
187    @Deprecated
188    public static int getSingleCodePoint(CharSequence s) {
189        int length = s.length();
190        if (length < 1 || length > 2) {
191            return Integer.MAX_VALUE;
192        }
193        int result = Character.codePointAt(s, 0);
194        return (result < 0x10000) == (length == 1) ? result : Integer.MAX_VALUE;
195    }
196
197    /**
198     * Utility function for comparing objects that may be null
199     * string.
200     *
201     * @internal
202     * @deprecated This API is ICU internal only.
203     */
204    @Deprecated
205    public static final <T extends Object> boolean equals(T a, T b) {
206        return a == null ? b == null
207                : b == null ? false
208                        : a.equals(b);
209    }
210
211    /**
212     * Utility for comparing the contents of CharSequences
213     *
214     * @internal
215     * @deprecated This API is ICU internal only.
216     */
217    @Deprecated
218    public static int compare(CharSequence a, CharSequence b) {
219        int alength = a.length();
220        int blength = b.length();
221        int min = alength <= blength ? alength : blength;
222        for (int i = 0; i < min; ++i) {
223            int diff = a.charAt(i) - b.charAt(i);
224            if (diff != 0) {
225                return diff;
226            }
227        }
228        return alength - blength;
229    }
230
231    /**
232     * Utility for comparing the contents of CharSequences
233     *
234     * @internal
235     * @deprecated This API is ICU internal only.
236     */
237    @Deprecated
238    public static boolean equalsChars(CharSequence a, CharSequence b) {
239        // do length test first for fast path
240        return a.length() == b.length() && compare(a,b) == 0;
241    }
242
243    /**
244     * Are we on a character boundary?
245     *
246     * @internal
247     * @deprecated This API is ICU internal only.
248     */
249    @Deprecated
250    public static boolean onCharacterBoundary(CharSequence s, int i) {
251        return i <= 0
252        || i >= s.length()
253        || !Character.isHighSurrogate(s.charAt(i-1))
254        || !Character.isLowSurrogate(s.charAt(i));
255    }
256
257    /**
258     * Find code point in string.
259     *
260     * @internal
261     * @deprecated This API is ICU internal only.
262     */
263    @Deprecated
264    public static int indexOf(CharSequence s, int codePoint) {
265        int cp;
266        for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
267            cp = Character.codePointAt(s, i);
268            if (cp == codePoint) {
269                return i;
270            }
271        }
272        return -1;
273    }
274
275    /**
276     * Utility function for simplified, more robust loops, such as:
277     * <pre>
278     *   for (int codePoint : CharSequences.codePoints(string)) {
279     *     doSomethingWith(codePoint);
280     *   }
281     * </pre>
282     *
283     * @internal
284     * @deprecated This API is ICU internal only.
285     */
286    @Deprecated
287    public static int[] codePoints(CharSequence s) {
288        int[] result = new int[s.length()]; // in the vast majority of cases, the length is the same
289        int j = 0;
290        for (int i = 0; i < s.length(); ++i) {
291            char cp = s.charAt(i);
292            if (cp >= 0xDC00 && cp <= 0xDFFF && i != 0 ) { // hand-code for speed
293                char last = (char) result[j-1];
294                if (last >= 0xD800 && last <= 0xDBFF) {
295                    // Note: j-1 is safe, because j can only be zero if i is zero. But i!=0 in this block.
296                    result[j-1] = Character.toCodePoint(last, cp);
297                    continue;
298                }
299            }
300            result[j++] = cp;
301        }
302        if (j == result.length) {
303            return result;
304        }
305        int[] shortResult = new int[j];
306        System.arraycopy(result, 0, shortResult, 0, j);
307        return shortResult;
308    }
309
310    private CharSequences() {
311    }
312}
313