1/* GENERATED SOURCE. DO NOT MODIFY. */
2/**
3 *******************************************************************************
4 * Copyright (C) 1996-2016, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 *******************************************************************************
7 */
8
9package android.icu.text;
10
11/**
12 * <p>
13 * Standalone utility class providing UTF16 character conversions and indexing conversions.
14 * </p>
15 * <p>
16 * Code that uses strings alone rarely need modification. By design, UTF-16 does not allow overlap,
17 * so searching for strings is a safe operation. Similarly, concatenation is always safe.
18 * Substringing is safe if the start and end are both on UTF-32 boundaries. In normal code, the
19 * values for start and end are on those boundaries, since they arose from operations like
20 * searching. If not, the nearest UTF-32 boundaries can be determined using <code>bounds()</code>.
21 * </p>
22 * <strong>Examples:</strong>
23 * <p>
24 * The following examples illustrate use of some of these methods.
25 *
26 * <pre>
27 * // iteration forwards: Original
28 * for (int i = 0; i &lt; s.length(); ++i) {
29 *     char ch = s.charAt(i);
30 *     doSomethingWith(ch);
31 * }
32 *
33 * // iteration forwards: Changes for UTF-32
34 * int ch;
35 * for (int i = 0; i &lt; s.length(); i += UTF16.getCharCount(ch)) {
36 *     ch = UTF16.charAt(s, i);
37 *     doSomethingWith(ch);
38 * }
39 *
40 * // iteration backwards: Original
41 * for (int i = s.length() - 1; i &gt;= 0; --i) {
42 *     char ch = s.charAt(i);
43 *     doSomethingWith(ch);
44 * }
45 *
46 * // iteration backwards: Changes for UTF-32
47 * int ch;
48 * for (int i = s.length() - 1; i &gt; 0; i -= UTF16.getCharCount(ch)) {
49 *     ch = UTF16.charAt(s, i);
50 *     doSomethingWith(ch);
51 * }
52 * </pre>
53 *
54 * <strong>Notes:</strong>
55 * <ul>
56 * <li> <strong>Naming:</strong> For clarity, High and Low surrogates are called <code>Lead</code>
57 * and <code>Trail</code> in the API, which gives a better sense of their ordering in a string.
58 * <code>offset16</code> and <code>offset32</code> are used to distinguish offsets to UTF-16
59 * boundaries vs offsets to UTF-32 boundaries. <code>int char32</code> is used to contain UTF-32
60 * characters, as opposed to <code>char16</code>, which is a UTF-16 code unit. </li>
61 * <li> <strong>Roundtripping Offsets:</strong> You can always roundtrip from a UTF-32 offset to a
62 * UTF-16 offset and back. Because of the difference in structure, you can roundtrip from a UTF-16
63 * offset to a UTF-32 offset and back if and only if <code>bounds(string, offset16) != TRAIL</code>.
64 * </li>
65 * <li> <strong>Exceptions:</strong> The error checking will throw an exception if indices are out
66 * of bounds. Other than than that, all methods will behave reasonably, even if unmatched surrogates
67 * or out-of-bounds UTF-32 values are present. <code>UCharacter.isLegal()</code> can be used to
68 * check for validity if desired. </li>
69 * <li> <strong>Unmatched Surrogates:</strong> If the string contains unmatched surrogates, then
70 * these are counted as one UTF-32 value. This matches their iteration behavior, which is vital. It
71 * also matches common display practice as missing glyphs (see the Unicode Standard Section 5.4,
72 * 5.5). </li>
73 * <li> <strong>Optimization:</strong> The method implementations may need optimization if the
74 * compiler doesn't fold static final methods. Since surrogate pairs will form an exceeding small
75 * percentage of all the text in the world, the singleton case should always be optimized for. </li>
76 * </ul>
77 *
78 * @author Mark Davis, with help from Markus Scherer
79 * @hide Only a subset of ICU is exposed in Android
80 */
81
82public final class UTF16 {
83    // public variables ---------------------------------------------------
84
85    /**
86     * Value returned in {@link #bounds(String, int) bounds()}.
87     * These values are chosen specifically so that it actually represents the position of the
88     * character [offset16 - (value &gt;&gt; 2), offset16 + (value &amp; 3)]
89     */
90    public static final int SINGLE_CHAR_BOUNDARY = 1, LEAD_SURROGATE_BOUNDARY = 2,
91            TRAIL_SURROGATE_BOUNDARY = 5;
92
93    /**
94     * The lowest Unicode code point value.
95     */
96    public static final int CODEPOINT_MIN_VALUE = 0;
97
98    /**
99     * The highest Unicode code point value (scalar value) according to the Unicode Standard.
100     */
101    public static final int CODEPOINT_MAX_VALUE = 0x10ffff;
102
103    /**
104     * The minimum value for Supplementary code points
105     */
106    public static final int SUPPLEMENTARY_MIN_VALUE = 0x10000;
107
108    /**
109     * Lead surrogate minimum value
110     */
111    public static final int LEAD_SURROGATE_MIN_VALUE = 0xD800;
112
113    /**
114     * Trail surrogate minimum value
115     */
116    public static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00;
117
118    /**
119     * Lead surrogate maximum value
120     */
121    public static final int LEAD_SURROGATE_MAX_VALUE = 0xDBFF;
122
123    /**
124     * Trail surrogate maximum value
125     */
126    public static final int TRAIL_SURROGATE_MAX_VALUE = 0xDFFF;
127
128    /**
129     * Surrogate minimum value
130     */
131    public static final int SURROGATE_MIN_VALUE = LEAD_SURROGATE_MIN_VALUE;
132
133    /**
134     * Maximum surrogate value
135     */
136    public static final int SURROGATE_MAX_VALUE = TRAIL_SURROGATE_MAX_VALUE;
137
138    /**
139     * Lead surrogate bitmask
140     */
141    private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00;
142
143    /**
144     * Trail surrogate bitmask
145     */
146    private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00;
147
148    /**
149     * Surrogate bitmask
150     */
151    private static final int SURROGATE_BITMASK = 0xFFFFF800;
152
153    /**
154     * Lead surrogate bits
155     */
156    private static final int LEAD_SURROGATE_BITS = 0xD800;
157
158    /**
159     * Trail surrogate bits
160     */
161    private static final int TRAIL_SURROGATE_BITS = 0xDC00;
162
163    /**
164     * Surrogate bits
165     */
166    private static final int SURROGATE_BITS = 0xD800;
167
168    // constructor --------------------------------------------------------
169
170    // /CLOVER:OFF
171    /**
172     * Prevent instance from being created.
173     */
174    private UTF16() {
175    }
176
177    // /CLOVER:ON
178    // public method ------------------------------------------------------
179
180    /**
181     * Extract a single UTF-32 value from a string. Used when iterating forwards or backwards (with
182     * <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is
183     * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">
184     * UCharacter.isLegal()</a></code>
185     * on the return value. If the char retrieved is part of a surrogate pair, its supplementary
186     * character will be returned. If a complete supplementary character is not found the incomplete
187     * character will be returned
188     *
189     * @param source Array of UTF-16 chars
190     * @param offset16 UTF-16 offset to the start of the character.
191     * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
192     *         of that codepoint are the same as in <code>bounds32()</code>.
193     * @exception IndexOutOfBoundsException Thrown if offset16 is out of bounds.
194     */
195    public static int charAt(String source, int offset16) {
196        char single = source.charAt(offset16);
197        if (single < LEAD_SURROGATE_MIN_VALUE) {
198            return single;
199        }
200        return _charAt(source, offset16, single);
201    }
202
203    private static int _charAt(String source, int offset16, char single) {
204        if (single > TRAIL_SURROGATE_MAX_VALUE) {
205            return single;
206        }
207
208        // Convert the UTF-16 surrogate pair if necessary.
209        // For simplicity in usage, and because the frequency of pairs is
210        // low, look both directions.
211
212        if (single <= LEAD_SURROGATE_MAX_VALUE) {
213            ++offset16;
214            if (source.length() != offset16) {
215                char trail = source.charAt(offset16);
216                if (trail >= TRAIL_SURROGATE_MIN_VALUE && trail <= TRAIL_SURROGATE_MAX_VALUE) {
217                    return Character.toCodePoint(single, trail);
218                }
219            }
220        } else {
221            --offset16;
222            if (offset16 >= 0) {
223                // single is a trail surrogate so
224                char lead = source.charAt(offset16);
225                if (lead >= LEAD_SURROGATE_MIN_VALUE && lead <= LEAD_SURROGATE_MAX_VALUE) {
226                    return Character.toCodePoint(lead, single);
227                }
228            }
229        }
230        return single; // return unmatched surrogate
231    }
232
233    /**
234     * Extract a single UTF-32 value from a string. Used when iterating forwards or backwards (with
235     * <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is
236     * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">
237     * UCharacter.isLegal()</a></code>
238     * on the return value. If the char retrieved is part of a surrogate pair, its supplementary
239     * character will be returned. If a complete supplementary character is not found the incomplete
240     * character will be returned
241     *
242     * @param source Array of UTF-16 chars
243     * @param offset16 UTF-16 offset to the start of the character.
244     * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
245     *         of that codepoint are the same as in <code>bounds32()</code>.
246     * @exception IndexOutOfBoundsException Thrown if offset16 is out of bounds.
247     */
248    public static int charAt(CharSequence source, int offset16) {
249        char single = source.charAt(offset16);
250        if (single < UTF16.LEAD_SURROGATE_MIN_VALUE) {
251            return single;
252        }
253        return _charAt(source, offset16, single);
254    }
255
256    private static int _charAt(CharSequence source, int offset16, char single) {
257        if (single > UTF16.TRAIL_SURROGATE_MAX_VALUE) {
258            return single;
259        }
260
261        // Convert the UTF-16 surrogate pair if necessary.
262        // For simplicity in usage, and because the frequency of pairs is
263        // low, look both directions.
264
265        if (single <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
266            ++offset16;
267            if (source.length() != offset16) {
268                char trail = source.charAt(offset16);
269                if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE
270                        && trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
271                    return Character.toCodePoint(single, trail);
272                }
273            }
274        } else {
275            --offset16;
276            if (offset16 >= 0) {
277                // single is a trail surrogate so
278                char lead = source.charAt(offset16);
279                if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE
280                        && lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
281                    return Character.toCodePoint(lead, single);
282                }
283            }
284        }
285        return single; // return unmatched surrogate
286    }
287
288    /**
289     * Extract a single UTF-32 value from a string. Used when iterating forwards or backwards (with
290     * <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is
291     * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()
292     * </a></code>
293     * on the return value. If the char retrieved is part of a surrogate pair, its supplementary
294     * character will be returned. If a complete supplementary character is not found the incomplete
295     * character will be returned
296     *
297     * @param source UTF-16 chars string buffer
298     * @param offset16 UTF-16 offset to the start of the character.
299     * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
300     *         of that codepoint are the same as in <code>bounds32()</code>.
301     * @exception IndexOutOfBoundsException Thrown if offset16 is out of bounds.
302     */
303    public static int charAt(StringBuffer source, int offset16) {
304        if (offset16 < 0 || offset16 >= source.length()) {
305            throw new StringIndexOutOfBoundsException(offset16);
306        }
307
308        char single = source.charAt(offset16);
309        if (!isSurrogate(single)) {
310            return single;
311        }
312
313        // Convert the UTF-16 surrogate pair if necessary.
314        // For simplicity in usage, and because the frequency of pairs is
315        // low, look both directions.
316
317        if (single <= LEAD_SURROGATE_MAX_VALUE) {
318            ++offset16;
319            if (source.length() != offset16) {
320                char trail = source.charAt(offset16);
321                if (isTrailSurrogate(trail))
322                    return Character.toCodePoint(single, trail);
323            }
324        } else {
325            --offset16;
326            if (offset16 >= 0) {
327                // single is a trail surrogate so
328                char lead = source.charAt(offset16);
329                if (isLeadSurrogate(lead)) {
330                    return Character.toCodePoint(lead, single);
331                }
332            }
333        }
334        return single; // return unmatched surrogate
335    }
336
337    /**
338     * Extract a single UTF-32 value from a substring. Used when iterating forwards or backwards
339     * (with <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is
340     * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()
341     * </a></code>
342     * on the return value. If the char retrieved is part of a surrogate pair, its supplementary
343     * character will be returned. If a complete supplementary character is not found the incomplete
344     * character will be returned
345     *
346     * @param source Array of UTF-16 chars
347     * @param start Offset to substring in the source array for analyzing
348     * @param limit Offset to substring in the source array for analyzing
349     * @param offset16 UTF-16 offset relative to start
350     * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
351     *         of that codepoint are the same as in <code>bounds32()</code>.
352     * @exception IndexOutOfBoundsException Thrown if offset16 is not within the range of start and limit.
353     */
354    public static int charAt(char source[], int start, int limit, int offset16) {
355        offset16 += start;
356        if (offset16 < start || offset16 >= limit) {
357            throw new ArrayIndexOutOfBoundsException(offset16);
358        }
359
360        char single = source[offset16];
361        if (!isSurrogate(single)) {
362            return single;
363        }
364
365        // Convert the UTF-16 surrogate pair if necessary.
366        // For simplicity in usage, and because the frequency of pairs is
367        // low, look both directions.
368        if (single <= LEAD_SURROGATE_MAX_VALUE) {
369            offset16++;
370            if (offset16 >= limit) {
371                return single;
372            }
373            char trail = source[offset16];
374            if (isTrailSurrogate(trail)) {
375                return Character.toCodePoint(single, trail);
376            }
377        } else { // isTrailSurrogate(single), so
378            if (offset16 == start) {
379                return single;
380            }
381            offset16--;
382            char lead = source[offset16];
383            if (isLeadSurrogate(lead))
384                return Character.toCodePoint(lead, single);
385        }
386        return single; // return unmatched surrogate
387    }
388
389    /**
390     * Extract a single UTF-32 value from a string. Used when iterating forwards or backwards (with
391     * <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is
392     * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()
393     * </a></code>
394     * on the return value. If the char retrieved is part of a surrogate pair, its supplementary
395     * character will be returned. If a complete supplementary character is not found the incomplete
396     * character will be returned
397     *
398     * @param source UTF-16 chars string buffer
399     * @param offset16 UTF-16 offset to the start of the character.
400     * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
401     *         of that codepoint are the same as in <code>bounds32()</code>.
402     * @exception IndexOutOfBoundsException Thrown if offset16 is out of bounds.
403     */
404    public static int charAt(Replaceable source, int offset16) {
405        if (offset16 < 0 || offset16 >= source.length()) {
406            throw new StringIndexOutOfBoundsException(offset16);
407        }
408
409        char single = source.charAt(offset16);
410        if (!isSurrogate(single)) {
411            return single;
412        }
413
414        // Convert the UTF-16 surrogate pair if necessary.
415        // For simplicity in usage, and because the frequency of pairs is
416        // low, look both directions.
417
418        if (single <= LEAD_SURROGATE_MAX_VALUE) {
419            ++offset16;
420            if (source.length() != offset16) {
421                char trail = source.charAt(offset16);
422                if (isTrailSurrogate(trail))
423                    return Character.toCodePoint(single, trail);
424            }
425        } else {
426            --offset16;
427            if (offset16 >= 0) {
428                // single is a trail surrogate so
429                char lead = source.charAt(offset16);
430                if (isLeadSurrogate(lead)) {
431                    return Character.toCodePoint(lead, single);
432                }
433            }
434        }
435        return single; // return unmatched surrogate
436    }
437
438    /**
439     * Determines how many chars this char32 requires. If a validity check is required, use <code>
440     * <a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code>
441     * on char32 before calling.
442     *
443     * @param char32 The input codepoint.
444     * @return 2 if is in supplementary space, otherwise 1.
445     */
446    public static int getCharCount(int char32) {
447        if (char32 < SUPPLEMENTARY_MIN_VALUE) {
448            return 1;
449        }
450        return 2;
451    }
452
453    /**
454     * Returns the type of the boundaries around the char at offset16. Used for random access.
455     *
456     * @param source Text to analyse
457     * @param offset16 UTF-16 offset
458     * @return
459     *            <ul>
460     *            <li> SINGLE_CHAR_BOUNDARY : a single char; the bounds are [offset16, offset16+1]
461     *            <li> LEAD_SURROGATE_BOUNDARY : a surrogate pair starting at offset16; the bounds
462     *            are [offset16, offset16 + 2]
463     *            <li> TRAIL_SURROGATE_BOUNDARY : a surrogate pair starting at offset16 - 1; the
464     *            bounds are [offset16 - 1, offset16 + 1]
465     *            </ul>
466     *            For bit-twiddlers, the return values for these are chosen so that the boundaries
467     *            can be gotten by: [offset16 - (value &gt;&gt; 2), offset16 + (value &amp; 3)].
468     * @exception IndexOutOfBoundsException If offset16 is out of bounds.
469     */
470    public static int bounds(String source, int offset16) {
471        char ch = source.charAt(offset16);
472        if (isSurrogate(ch)) {
473            if (isLeadSurrogate(ch)) {
474                if (++offset16 < source.length() && isTrailSurrogate(source.charAt(offset16))) {
475                    return LEAD_SURROGATE_BOUNDARY;
476                }
477            } else {
478                // isTrailSurrogate(ch), so
479                --offset16;
480                if (offset16 >= 0 && isLeadSurrogate(source.charAt(offset16))) {
481                    return TRAIL_SURROGATE_BOUNDARY;
482                }
483            }
484        }
485        return SINGLE_CHAR_BOUNDARY;
486    }
487
488    /**
489     * Returns the type of the boundaries around the char at offset16. Used for random access.
490     *
491     * @param source String buffer to analyse
492     * @param offset16 UTF16 offset
493     * @return
494     *            <ul>
495     *            <li> SINGLE_CHAR_BOUNDARY : a single char; the bounds are [offset16, offset16 + 1]
496     *            <li> LEAD_SURROGATE_BOUNDARY : a surrogate pair starting at offset16; the bounds
497     *            are [offset16, offset16 + 2]
498     *            <li> TRAIL_SURROGATE_BOUNDARY : a surrogate pair starting at offset16 - 1; the
499     *            bounds are [offset16 - 1, offset16 + 1]
500     *            </ul>
501     *            For bit-twiddlers, the return values for these are chosen so that the boundaries
502     *            can be gotten by: [offset16 - (value &gt;&gt; 2), offset16 + (value &amp; 3)].
503     * @exception IndexOutOfBoundsException If offset16 is out of bounds.
504     */
505    public static int bounds(StringBuffer source, int offset16) {
506        char ch = source.charAt(offset16);
507        if (isSurrogate(ch)) {
508            if (isLeadSurrogate(ch)) {
509                if (++offset16 < source.length() && isTrailSurrogate(source.charAt(offset16))) {
510                    return LEAD_SURROGATE_BOUNDARY;
511                }
512            } else {
513                // isTrailSurrogate(ch), so
514                --offset16;
515                if (offset16 >= 0 && isLeadSurrogate(source.charAt(offset16))) {
516                    return TRAIL_SURROGATE_BOUNDARY;
517                }
518            }
519        }
520        return SINGLE_CHAR_BOUNDARY;
521    }
522
523    /**
524     * Returns the type of the boundaries around the char at offset16. Used for random access. Note
525     * that the boundaries are determined with respect to the subarray, hence the char array
526     * {0xD800, 0xDC00} has the result SINGLE_CHAR_BOUNDARY for start = offset16 = 0 and limit = 1.
527     *
528     * @param source Char array to analyse
529     * @param start Offset to substring in the source array for analyzing
530     * @param limit Offset to substring in the source array for analyzing
531     * @param offset16 UTF16 offset relative to start
532     * @return
533     *            <ul>
534     *            <li> SINGLE_CHAR_BOUNDARY : a single char; the bounds are
535     *            <li> LEAD_SURROGATE_BOUNDARY : a surrogate pair starting at offset16; the bounds
536     *            are [offset16, offset16 + 2]
537     *            <li> TRAIL_SURROGATE_BOUNDARY : a surrogate pair starting at offset16 - 1; the
538     *            bounds are [offset16 - 1, offset16 + 1]
539     *            </ul>
540     *            For bit-twiddlers, the boundary values for these are chosen so that the boundaries
541     *            can be gotten by: [offset16 - (boundvalue &gt;&gt; 2), offset16 + (boundvalue &amp; 3)].
542     * @exception IndexOutOfBoundsException If offset16 is not within the range of start and limit.
543     */
544    public static int bounds(char source[], int start, int limit, int offset16) {
545        offset16 += start;
546        if (offset16 < start || offset16 >= limit) {
547            throw new ArrayIndexOutOfBoundsException(offset16);
548        }
549        char ch = source[offset16];
550        if (isSurrogate(ch)) {
551            if (isLeadSurrogate(ch)) {
552                ++offset16;
553                if (offset16 < limit && isTrailSurrogate(source[offset16])) {
554                    return LEAD_SURROGATE_BOUNDARY;
555                }
556            } else { // isTrailSurrogate(ch), so
557                --offset16;
558                if (offset16 >= start && isLeadSurrogate(source[offset16])) {
559                    return TRAIL_SURROGATE_BOUNDARY;
560                }
561            }
562        }
563        return SINGLE_CHAR_BOUNDARY;
564    }
565
566    /**
567     * Determines whether the code value is a surrogate.
568     *
569     * @param char16 The input character.
570     * @return true If the input character is a surrogate.
571     */
572    public static boolean isSurrogate(char char16) {
573        return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS;
574    }
575
576    /**
577     * Determines whether the character is a trail surrogate.
578     *
579     * @param char16 The input character.
580     * @return true If the input character is a trail surrogate.
581     */
582    public static boolean isTrailSurrogate(char char16) {
583        return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
584    }
585
586    /**
587     * Determines whether the character is a lead surrogate.
588     *
589     * @param char16 The input character.
590     * @return true If the input character is a lead surrogate
591     */
592    public static boolean isLeadSurrogate(char char16) {
593        return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
594    }
595
596    /**
597     * Returns the lead surrogate. If a validity check is required, use
598     * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on char32
599     * before calling.
600     *
601     * @param char32 The input character.
602     * @return lead surrogate if the getCharCount(ch) is 2; <br>
603     *         and 0 otherwise (note: 0 is not a valid lead surrogate).
604     */
605    public static char getLeadSurrogate(int char32) {
606        if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
607            return (char) (LEAD_SURROGATE_OFFSET_ + (char32 >> LEAD_SURROGATE_SHIFT_));
608        }
609        return 0;
610    }
611
612    /**
613     * Returns the trail surrogate. If a validity check is required, use
614     * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on char32
615     * before calling.
616     *
617     * @param char32 The input character.
618     * @return the trail surrogate if the getCharCount(ch) is 2; <br>
619     *         otherwise the character itself
620     */
621    public static char getTrailSurrogate(int char32) {
622        if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
623            return (char) (TRAIL_SURROGATE_MIN_VALUE + (char32 & TRAIL_SURROGATE_MASK_));
624        }
625        return (char) char32;
626    }
627
628    /**
629     * Convenience method corresponding to String.valueOf(char). Returns a one or two char string
630     * containing the UTF-32 value in UTF16 format. If a validity check is required, use
631     * {@link android.icu.lang.UCharacter#isLegal(int)} on char32 before calling.
632     *
633     * @param char32 The input character.
634     * @return string value of char32 in UTF16 format
635     * @exception IllegalArgumentException Thrown if char32 is a invalid codepoint.
636     */
637    public static String valueOf(int char32) {
638        if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
639            throw new IllegalArgumentException("Illegal codepoint");
640        }
641        return toString(char32);
642    }
643
644    /**
645     * Convenience method corresponding to String.valueOf(codepoint at offset16). Returns a one or
646     * two char string containing the UTF-32 value in UTF16 format. If offset16 indexes a surrogate
647     * character, the whole supplementary codepoint will be returned. If a validity check is
648     * required, use {@link android.icu.lang.UCharacter#isLegal(int)} on the
649     * codepoint at offset16 before calling. The result returned will be a newly created String
650     * obtained by calling source.substring(..) with the appropriate indexes.
651     *
652     * @param source The input string.
653     * @param offset16 The UTF16 index to the codepoint in source
654     * @return string value of char32 in UTF16 format
655     */
656    public static String valueOf(String source, int offset16) {
657        switch (bounds(source, offset16)) {
658        case LEAD_SURROGATE_BOUNDARY:
659            return source.substring(offset16, offset16 + 2);
660        case TRAIL_SURROGATE_BOUNDARY:
661            return source.substring(offset16 - 1, offset16 + 1);
662        default:
663            return source.substring(offset16, offset16 + 1);
664        }
665    }
666
667    /**
668     * Convenience method corresponding to StringBuffer.valueOf(codepoint at offset16). Returns a
669     * one or two char string containing the UTF-32 value in UTF16 format. If offset16 indexes a
670     * surrogate character, the whole supplementary codepoint will be returned. If a validity check
671     * is required, use {@link android.icu.lang.UCharacter#isLegal(int)} on
672     * the codepoint at offset16 before calling. The result returned will be a newly created String
673     * obtained by calling source.substring(..) with the appropriate indexes.
674     *
675     * @param source The input string buffer.
676     * @param offset16 The UTF16 index to the codepoint in source
677     * @return string value of char32 in UTF16 format
678     */
679    public static String valueOf(StringBuffer source, int offset16) {
680        switch (bounds(source, offset16)) {
681        case LEAD_SURROGATE_BOUNDARY:
682            return source.substring(offset16, offset16 + 2);
683        case TRAIL_SURROGATE_BOUNDARY:
684            return source.substring(offset16 - 1, offset16 + 1);
685        default:
686            return source.substring(offset16, offset16 + 1);
687        }
688    }
689
690    /**
691     * Convenience method. Returns a one or two char string containing the UTF-32 value in UTF16
692     * format. If offset16 indexes a surrogate character, the whole supplementary codepoint will be
693     * returned, except when either the leading or trailing surrogate character lies out of the
694     * specified subarray. In the latter case, only the surrogate character within bounds will be
695     * returned. If a validity check is required, use
696     * {@link android.icu.lang.UCharacter#isLegal(int)} on the codepoint at
697     * offset16 before calling. The result returned will be a newly created String containing the
698     * relevant characters.
699     *
700     * @param source The input char array.
701     * @param start Start index of the subarray
702     * @param limit End index of the subarray
703     * @param offset16 The UTF16 index to the codepoint in source relative to start
704     * @return string value of char32 in UTF16 format
705     */
706    public static String valueOf(char source[], int start, int limit, int offset16) {
707        switch (bounds(source, start, limit, offset16)) {
708        case LEAD_SURROGATE_BOUNDARY:
709            return new String(source, start + offset16, 2);
710        case TRAIL_SURROGATE_BOUNDARY:
711            return new String(source, start + offset16 - 1, 2);
712        }
713        return new String(source, start + offset16, 1);
714    }
715
716    /**
717     * Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See
718     * the {@link UTF16 class description} for notes on roundtripping.
719     *
720     * @param source The UTF-16 string
721     * @param offset32 UTF-32 offset
722     * @return UTF-16 offset
723     * @exception IndexOutOfBoundsException If offset32 is out of bounds.
724     */
725    public static int findOffsetFromCodePoint(String source, int offset32) {
726        char ch;
727        int size = source.length(), result = 0, count = offset32;
728        if (offset32 < 0 || offset32 > size) {
729            throw new StringIndexOutOfBoundsException(offset32);
730        }
731        while (result < size && count > 0) {
732            ch = source.charAt(result);
733            if (isLeadSurrogate(ch) && ((result + 1) < size)
734                    && isTrailSurrogate(source.charAt(result + 1))) {
735                result++;
736            }
737
738            count--;
739            result++;
740        }
741        if (count != 0) {
742            throw new StringIndexOutOfBoundsException(offset32);
743        }
744        return result;
745    }
746
747    /**
748     * Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See
749     * the {@link UTF16 class description} for notes on roundtripping.
750     *
751     * @param source The UTF-16 string buffer
752     * @param offset32 UTF-32 offset
753     * @return UTF-16 offset
754     * @exception IndexOutOfBoundsException If offset32 is out of bounds.
755     */
756    public static int findOffsetFromCodePoint(StringBuffer source, int offset32) {
757        char ch;
758        int size = source.length(), result = 0, count = offset32;
759        if (offset32 < 0 || offset32 > size) {
760            throw new StringIndexOutOfBoundsException(offset32);
761        }
762        while (result < size && count > 0) {
763            ch = source.charAt(result);
764            if (isLeadSurrogate(ch) && ((result + 1) < size)
765                    && isTrailSurrogate(source.charAt(result + 1))) {
766                result++;
767            }
768
769            count--;
770            result++;
771        }
772        if (count != 0) {
773            throw new StringIndexOutOfBoundsException(offset32);
774        }
775        return result;
776    }
777
778    /**
779     * Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See
780     * the {@link UTF16 class description} for notes on roundtripping.
781     *
782     * @param source The UTF-16 char array whose substring is to be analysed
783     * @param start Offset of the substring to be analysed
784     * @param limit Offset of the substring to be analysed
785     * @param offset32 UTF-32 offset relative to start
786     * @return UTF-16 offset relative to start
787     * @exception IndexOutOfBoundsException If offset32 is out of bounds.
788     */
789    public static int findOffsetFromCodePoint(char source[], int start, int limit, int offset32) {
790        char ch;
791        int result = start, count = offset32;
792        if (offset32 > limit - start) {
793            throw new ArrayIndexOutOfBoundsException(offset32);
794        }
795        while (result < limit && count > 0) {
796            ch = source[result];
797            if (isLeadSurrogate(ch) && ((result + 1) < limit)
798                    && isTrailSurrogate(source[result + 1])) {
799                result++;
800            }
801
802            count--;
803            result++;
804        }
805        if (count != 0) {
806            throw new ArrayIndexOutOfBoundsException(offset32);
807        }
808        return result - start;
809    }
810
811    /**
812     * Returns the UTF-32 offset corresponding to the first UTF-32 boundary at or after the given
813     * UTF-16 offset. Used for random access. See the {@link UTF16 class description} for
814     * notes on roundtripping.<br>
815     * <i>Note: If the UTF-16 offset is into the middle of a surrogate pair, then the UTF-32 offset
816     * of the <strong>lead</strong> of the pair is returned. </i>
817     * <p>
818     * To find the UTF-32 length of a string, use:
819     *
820     * <pre>
821     * len32 = countCodePoint(source, source.length());
822     * </pre>
823     *
824     * @param source Text to analyse
825     * @param offset16 UTF-16 offset &lt; source text length.
826     * @return UTF-32 offset
827     * @exception IndexOutOfBoundsException If offset16 is out of bounds.
828     */
829    public static int findCodePointOffset(String source, int offset16) {
830        if (offset16 < 0 || offset16 > source.length()) {
831            throw new StringIndexOutOfBoundsException(offset16);
832        }
833
834        int result = 0;
835        char ch;
836        boolean hadLeadSurrogate = false;
837
838        for (int i = 0; i < offset16; ++i) {
839            ch = source.charAt(i);
840            if (hadLeadSurrogate && isTrailSurrogate(ch)) {
841                hadLeadSurrogate = false; // count valid trail as zero
842            } else {
843                hadLeadSurrogate = isLeadSurrogate(ch);
844                ++result; // count others as 1
845            }
846        }
847
848        if (offset16 == source.length()) {
849            return result;
850        }
851
852        // end of source being the less significant surrogate character
853        // shift result back to the start of the supplementary character
854        if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) {
855            result--;
856        }
857
858        return result;
859    }
860
861    /**
862     * Returns the UTF-32 offset corresponding to the first UTF-32 boundary at the given UTF-16
863     * offset. Used for random access. See the {@link UTF16 class description} for notes on
864     * roundtripping.<br>
865     * <i>Note: If the UTF-16 offset is into the middle of a surrogate pair, then the UTF-32 offset
866     * of the <strong>lead</strong> of the pair is returned. </i>
867     * <p>
868     * To find the UTF-32 length of a string, use:
869     *
870     * <pre>
871     * len32 = countCodePoint(source);
872     * </pre>
873     *
874     * @param source Text to analyse
875     * @param offset16 UTF-16 offset &lt; source text length.
876     * @return UTF-32 offset
877     * @exception IndexOutOfBoundsException If offset16 is out of bounds.
878     */
879    public static int findCodePointOffset(StringBuffer source, int offset16) {
880        if (offset16 < 0 || offset16 > source.length()) {
881            throw new StringIndexOutOfBoundsException(offset16);
882        }
883
884        int result = 0;
885        char ch;
886        boolean hadLeadSurrogate = false;
887
888        for (int i = 0; i < offset16; ++i) {
889            ch = source.charAt(i);
890            if (hadLeadSurrogate && isTrailSurrogate(ch)) {
891                hadLeadSurrogate = false; // count valid trail as zero
892            } else {
893                hadLeadSurrogate = isLeadSurrogate(ch);
894                ++result; // count others as 1
895            }
896        }
897
898        if (offset16 == source.length()) {
899            return result;
900        }
901
902        // end of source being the less significant surrogate character
903        // shift result back to the start of the supplementary character
904        if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) {
905            result--;
906        }
907
908        return result;
909    }
910
911    /**
912     * Returns the UTF-32 offset corresponding to the first UTF-32 boundary at the given UTF-16
913     * offset. Used for random access. See the {@link UTF16 class description} for notes on
914     * roundtripping.<br>
915     * <i>Note: If the UTF-16 offset is into the middle of a surrogate pair, then the UTF-32 offset
916     * of the <strong>lead</strong> of the pair is returned. </i>
917     * <p>
918     * To find the UTF-32 length of a substring, use:
919     *
920     * <pre>
921     * len32 = countCodePoint(source, start, limit);
922     * </pre>
923     *
924     * @param source Text to analyse
925     * @param start Offset of the substring
926     * @param limit Offset of the substring
927     * @param offset16 UTF-16 relative to start
928     * @return UTF-32 offset relative to start
929     * @exception IndexOutOfBoundsException If offset16 is not within the range of start and limit.
930     */
931    public static int findCodePointOffset(char source[], int start, int limit, int offset16) {
932        offset16 += start;
933        if (offset16 > limit) {
934            throw new StringIndexOutOfBoundsException(offset16);
935        }
936
937        int result = 0;
938        char ch;
939        boolean hadLeadSurrogate = false;
940
941        for (int i = start; i < offset16; ++i) {
942            ch = source[i];
943            if (hadLeadSurrogate && isTrailSurrogate(ch)) {
944                hadLeadSurrogate = false; // count valid trail as zero
945            } else {
946                hadLeadSurrogate = isLeadSurrogate(ch);
947                ++result; // count others as 1
948            }
949        }
950
951        if (offset16 == limit) {
952            return result;
953        }
954
955        // end of source being the less significant surrogate character
956        // shift result back to the start of the supplementary character
957        if (hadLeadSurrogate && (isTrailSurrogate(source[offset16]))) {
958            result--;
959        }
960
961        return result;
962    }
963
964    /**
965     * Append a single UTF-32 value to the end of a StringBuffer. If a validity check is required,
966     * use {@link android.icu.lang.UCharacter#isLegal(int)} on char32 before
967     * calling.
968     *
969     * @param target The buffer to append to
970     * @param char32 Value to append.
971     * @return the updated StringBuffer
972     * @exception IllegalArgumentException Thrown when char32 does not lie within the range of the Unicode codepoints
973     */
974    public static StringBuffer append(StringBuffer target, int char32) {
975        // Check for irregular values
976        if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
977            throw new IllegalArgumentException("Illegal codepoint: " + Integer.toHexString(char32));
978        }
979
980        // Write the UTF-16 values
981        if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
982            target.append(getLeadSurrogate(char32));
983            target.append(getTrailSurrogate(char32));
984        } else {
985            target.append((char) char32);
986        }
987        return target;
988    }
989
990    /**
991     * Cover JDK 1.5 APIs. Append the code point to the buffer and return the buffer as a
992     * convenience.
993     *
994     * @param target The buffer to append to
995     * @param cp The code point to append
996     * @return the updated StringBuffer
997     * @throws IllegalArgumentException If cp is not a valid code point
998     */
999    public static StringBuffer appendCodePoint(StringBuffer target, int cp) {
1000        return append(target, cp);
1001    }
1002
1003    /**
1004     * Adds a codepoint to offset16 position of the argument char array.
1005     *
1006     * @param target Char array to be append with the new code point
1007     * @param limit UTF16 offset which the codepoint will be appended.
1008     * @param char32 Code point to be appended
1009     * @return offset after char32 in the array.
1010     * @exception IllegalArgumentException Thrown if there is not enough space for the append, or when char32 does not
1011     *                lie within the range of the Unicode codepoints.
1012     */
1013    public static int append(char[] target, int limit, int char32) {
1014        // Check for irregular values
1015        if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
1016            throw new IllegalArgumentException("Illegal codepoint");
1017        }
1018        // Write the UTF-16 values
1019        if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
1020            target[limit++] = getLeadSurrogate(char32);
1021            target[limit++] = getTrailSurrogate(char32);
1022        } else {
1023            target[limit++] = (char) char32;
1024        }
1025        return limit;
1026    }
1027
1028    /**
1029     * Number of codepoints in a UTF16 String
1030     *
1031     * @param source UTF16 string
1032     * @return number of codepoint in string
1033     */
1034    public static int countCodePoint(String source) {
1035        if (source == null || source.length() == 0) {
1036            return 0;
1037        }
1038        return findCodePointOffset(source, source.length());
1039    }
1040
1041    /**
1042     * Number of codepoints in a UTF16 String buffer
1043     *
1044     * @param source UTF16 string buffer
1045     * @return number of codepoint in string
1046     */
1047    public static int countCodePoint(StringBuffer source) {
1048        if (source == null || source.length() == 0) {
1049            return 0;
1050        }
1051        return findCodePointOffset(source, source.length());
1052    }
1053
1054    /**
1055     * Number of codepoints in a UTF16 char array substring
1056     *
1057     * @param source UTF16 char array
1058     * @param start Offset of the substring
1059     * @param limit Offset of the substring
1060     * @return number of codepoint in the substring
1061     * @exception IndexOutOfBoundsException If start and limit are not valid.
1062     */
1063    public static int countCodePoint(char source[], int start, int limit) {
1064        if (source == null || source.length == 0) {
1065            return 0;
1066        }
1067        return findCodePointOffset(source, start, limit, limit - start);
1068    }
1069
1070    /**
1071     * Set a code point into a UTF16 position. Adjusts target according if we are replacing a
1072     * non-supplementary codepoint with a supplementary and vice versa.
1073     *
1074     * @param target Stringbuffer
1075     * @param offset16 UTF16 position to insert into
1076     * @param char32 Code point
1077     */
1078    public static void setCharAt(StringBuffer target, int offset16, int char32) {
1079        int count = 1;
1080        char single = target.charAt(offset16);
1081
1082        if (isSurrogate(single)) {
1083            // pairs of the surrogate with offset16 at the lead char found
1084            if (isLeadSurrogate(single) && (target.length() > offset16 + 1)
1085                    && isTrailSurrogate(target.charAt(offset16 + 1))) {
1086                count++;
1087            } else {
1088                // pairs of the surrogate with offset16 at the trail char
1089                // found
1090                if (isTrailSurrogate(single) && (offset16 > 0)
1091                        && isLeadSurrogate(target.charAt(offset16 - 1))) {
1092                    offset16--;
1093                    count++;
1094                }
1095            }
1096        }
1097        target.replace(offset16, offset16 + count, valueOf(char32));
1098    }
1099
1100    /**
1101     * Set a code point into a UTF16 position in a char array. Adjusts target according if we are
1102     * replacing a non-supplementary codepoint with a supplementary and vice versa.
1103     *
1104     * @param target char array
1105     * @param limit numbers of valid chars in target, different from target.length. limit counts the
1106     *            number of chars in target that represents a string, not the size of array target.
1107     * @param offset16 UTF16 position to insert into
1108     * @param char32 code point
1109     * @return new number of chars in target that represents a string
1110     * @exception IndexOutOfBoundsException if offset16 is out of range
1111     */
1112    public static int setCharAt(char target[], int limit, int offset16, int char32) {
1113        if (offset16 >= limit) {
1114            throw new ArrayIndexOutOfBoundsException(offset16);
1115        }
1116        int count = 1;
1117        char single = target[offset16];
1118
1119        if (isSurrogate(single)) {
1120            // pairs of the surrogate with offset16 at the lead char found
1121            if (isLeadSurrogate(single) && (target.length > offset16 + 1)
1122                    && isTrailSurrogate(target[offset16 + 1])) {
1123                count++;
1124            } else {
1125                // pairs of the surrogate with offset16 at the trail char
1126                // found
1127                if (isTrailSurrogate(single) && (offset16 > 0)
1128                        && isLeadSurrogate(target[offset16 - 1])) {
1129                    offset16--;
1130                    count++;
1131                }
1132            }
1133        }
1134
1135        String str = valueOf(char32);
1136        int result = limit;
1137        int strlength = str.length();
1138        target[offset16] = str.charAt(0);
1139        if (count == strlength) {
1140            if (count == 2) {
1141                target[offset16 + 1] = str.charAt(1);
1142            }
1143        } else {
1144            // this is not exact match in space, we'll have to do some
1145            // shifting
1146            System.arraycopy(target, offset16 + count, target, offset16 + strlength, limit
1147                    - (offset16 + count));
1148            if (count < strlength) {
1149                // char32 is a supplementary character trying to squeeze into
1150                // a non-supplementary space
1151                target[offset16 + 1] = str.charAt(1);
1152                result++;
1153                if (result < target.length) {
1154                    target[result] = 0;
1155                }
1156            } else {
1157                // char32 is a non-supplementary character trying to fill
1158                // into a supplementary space
1159                result--;
1160                target[result] = 0;
1161            }
1162        }
1163        return result;
1164    }
1165
1166    /**
1167     * Shifts offset16 by the argument number of codepoints
1168     *
1169     * @param source string
1170     * @param offset16 UTF16 position to shift
1171     * @param shift32 number of codepoints to shift
1172     * @return new shifted offset16
1173     * @exception IndexOutOfBoundsException if the new offset16 is out of bounds.
1174     */
1175    public static int moveCodePointOffset(String source, int offset16, int shift32) {
1176        int result = offset16;
1177        int size = source.length();
1178        int count;
1179        char ch;
1180        if (offset16 < 0 || offset16 > size) {
1181            throw new StringIndexOutOfBoundsException(offset16);
1182        }
1183        if (shift32 > 0) {
1184            if (shift32 + offset16 > size) {
1185                throw new StringIndexOutOfBoundsException(offset16);
1186            }
1187            count = shift32;
1188            while (result < size && count > 0) {
1189                ch = source.charAt(result);
1190                if (isLeadSurrogate(ch) && ((result + 1) < size)
1191                        && isTrailSurrogate(source.charAt(result + 1))) {
1192                    result++;
1193                }
1194                count--;
1195                result++;
1196            }
1197        } else {
1198            if (offset16 + shift32 < 0) {
1199                throw new StringIndexOutOfBoundsException(offset16);
1200            }
1201            for (count = -shift32; count > 0; count--) {
1202                result--;
1203                if (result < 0) {
1204                    break;
1205                }
1206                ch = source.charAt(result);
1207                if (isTrailSurrogate(ch) && result > 0
1208                        && isLeadSurrogate(source.charAt(result - 1))) {
1209                    result--;
1210                }
1211            }
1212        }
1213        if (count != 0) {
1214            throw new StringIndexOutOfBoundsException(shift32);
1215        }
1216        return result;
1217    }
1218
1219    /**
1220     * Shifts offset16 by the argument number of codepoints
1221     *
1222     * @param source String buffer
1223     * @param offset16 UTF16 position to shift
1224     * @param shift32 Number of codepoints to shift
1225     * @return new shifted offset16
1226     * @exception IndexOutOfBoundsException If the new offset16 is out of bounds.
1227     */
1228    public static int moveCodePointOffset(StringBuffer source, int offset16, int shift32) {
1229        int result = offset16;
1230        int size = source.length();
1231        int count;
1232        char ch;
1233        if (offset16 < 0 || offset16 > size) {
1234            throw new StringIndexOutOfBoundsException(offset16);
1235        }
1236        if (shift32 > 0) {
1237            if (shift32 + offset16 > size) {
1238                throw new StringIndexOutOfBoundsException(offset16);
1239            }
1240            count = shift32;
1241            while (result < size && count > 0) {
1242                ch = source.charAt(result);
1243                if (isLeadSurrogate(ch) && ((result + 1) < size)
1244                        && isTrailSurrogate(source.charAt(result + 1))) {
1245                    result++;
1246                }
1247                count--;
1248                result++;
1249            }
1250        } else {
1251            if (offset16 + shift32 < 0) {
1252                throw new StringIndexOutOfBoundsException(offset16);
1253            }
1254            for (count = -shift32; count > 0; count--) {
1255                result--;
1256                if (result < 0) {
1257                    break;
1258                }
1259                ch = source.charAt(result);
1260                if (isTrailSurrogate(ch) && result > 0
1261                        && isLeadSurrogate(source.charAt(result - 1))) {
1262                    result--;
1263                }
1264            }
1265        }
1266        if (count != 0) {
1267            throw new StringIndexOutOfBoundsException(shift32);
1268        }
1269        return result;
1270    }
1271
1272    /**
1273     * Shifts offset16 by the argument number of codepoints within a subarray.
1274     *
1275     * @param source Char array
1276     * @param start Position of the subarray to be performed on
1277     * @param limit Position of the subarray to be performed on
1278     * @param offset16 UTF16 position to shift relative to start
1279     * @param shift32 Number of codepoints to shift
1280     * @return new shifted offset16 relative to start
1281     * @exception IndexOutOfBoundsException If the new offset16 is out of bounds with respect to the subarray or the
1282     *                subarray bounds are out of range.
1283     */
1284    public static int moveCodePointOffset(char source[], int start, int limit, int offset16,
1285            int shift32) {
1286        int size = source.length;
1287        int count;
1288        char ch;
1289        int result = offset16 + start;
1290        if (start < 0 || limit < start) {
1291            throw new StringIndexOutOfBoundsException(start);
1292        }
1293        if (limit > size) {
1294            throw new StringIndexOutOfBoundsException(limit);
1295        }
1296        if (offset16 < 0 || result > limit) {
1297            throw new StringIndexOutOfBoundsException(offset16);
1298        }
1299        if (shift32 > 0) {
1300            if (shift32 + result > size) {
1301                throw new StringIndexOutOfBoundsException(result);
1302            }
1303            count = shift32;
1304            while (result < limit && count > 0) {
1305                ch = source[result];
1306                if (isLeadSurrogate(ch) && (result + 1 < limit)
1307                        && isTrailSurrogate(source[result + 1])) {
1308                    result++;
1309                }
1310                count--;
1311                result++;
1312            }
1313        } else {
1314            if (result + shift32 < start) {
1315                throw new StringIndexOutOfBoundsException(result);
1316            }
1317            for (count = -shift32; count > 0; count--) {
1318                result--;
1319                if (result < start) {
1320                    break;
1321                }
1322                ch = source[result];
1323                if (isTrailSurrogate(ch) && result > start && isLeadSurrogate(source[result - 1])) {
1324                    result--;
1325                }
1326            }
1327        }
1328        if (count != 0) {
1329            throw new StringIndexOutOfBoundsException(shift32);
1330        }
1331        result -= start;
1332        return result;
1333    }
1334
1335    /**
1336     * Inserts char32 codepoint into target at the argument offset16. If the offset16 is in the
1337     * middle of a supplementary codepoint, char32 will be inserted after the supplementary
1338     * codepoint. The length of target increases by one if codepoint is non-supplementary, 2
1339     * otherwise.
1340     * <p>
1341     * The overall effect is exactly as if the argument were converted to a string by the method
1342     * valueOf(char) and the characters in that string were then inserted into target at the
1343     * position indicated by offset16.
1344     * </p>
1345     * <p>
1346     * The offset argument must be greater than or equal to 0, and less than or equal to the length
1347     * of source.
1348     *
1349     * @param target String buffer to insert to
1350     * @param offset16 Offset which char32 will be inserted in
1351     * @param char32 Codepoint to be inserted
1352     * @return a reference to target
1353     * @exception IndexOutOfBoundsException Thrown if offset16 is invalid.
1354     */
1355    public static StringBuffer insert(StringBuffer target, int offset16, int char32) {
1356        String str = valueOf(char32);
1357        if (offset16 != target.length() && bounds(target, offset16) == TRAIL_SURROGATE_BOUNDARY) {
1358            offset16++;
1359        }
1360        target.insert(offset16, str);
1361        return target;
1362    }
1363
1364    /**
1365     * Inserts char32 codepoint into target at the argument offset16. If the offset16 is in the
1366     * middle of a supplementary codepoint, char32 will be inserted after the supplementary
1367     * codepoint. Limit increases by one if codepoint is non-supplementary, 2 otherwise.
1368     * <p>
1369     * The overall effect is exactly as if the argument were converted to a string by the method
1370     * valueOf(char) and the characters in that string were then inserted into target at the
1371     * position indicated by offset16.
1372     * </p>
1373     * <p>
1374     * The offset argument must be greater than or equal to 0, and less than or equal to the limit.
1375     *
1376     * @param target Char array to insert to
1377     * @param limit End index of the char array, limit &lt;= target.length
1378     * @param offset16 Offset which char32 will be inserted in
1379     * @param char32 Codepoint to be inserted
1380     * @return new limit size
1381     * @exception IndexOutOfBoundsException Thrown if offset16 is invalid.
1382     */
1383    public static int insert(char target[], int limit, int offset16, int char32) {
1384        String str = valueOf(char32);
1385        if (offset16 != limit && bounds(target, 0, limit, offset16) == TRAIL_SURROGATE_BOUNDARY) {
1386            offset16++;
1387        }
1388        int size = str.length();
1389        if (limit + size > target.length) {
1390            throw new ArrayIndexOutOfBoundsException(offset16 + size);
1391        }
1392        System.arraycopy(target, offset16, target, offset16 + size, limit - offset16);
1393        target[offset16] = str.charAt(0);
1394        if (size == 2) {
1395            target[offset16 + 1] = str.charAt(1);
1396        }
1397        return limit + size;
1398    }
1399
1400    /**
1401     * Removes the codepoint at the specified position in this target (shortening target by 1
1402     * character if the codepoint is a non-supplementary, 2 otherwise).
1403     *
1404     * @param target String buffer to remove codepoint from
1405     * @param offset16 Offset which the codepoint will be removed
1406     * @return a reference to target
1407     * @exception IndexOutOfBoundsException Thrown if offset16 is invalid.
1408     */
1409    public static StringBuffer delete(StringBuffer target, int offset16) {
1410        int count = 1;
1411        switch (bounds(target, offset16)) {
1412        case LEAD_SURROGATE_BOUNDARY:
1413            count++;
1414            break;
1415        case TRAIL_SURROGATE_BOUNDARY:
1416            count++;
1417            offset16--;
1418            break;
1419        }
1420        target.delete(offset16, offset16 + count);
1421        return target;
1422    }
1423
1424    /**
1425     * Removes the codepoint at the specified position in this target (shortening target by 1
1426     * character if the codepoint is a non-supplementary, 2 otherwise).
1427     *
1428     * @param target String buffer to remove codepoint from
1429     * @param limit End index of the char array, limit &lt;= target.length
1430     * @param offset16 Offset which the codepoint will be removed
1431     * @return a new limit size
1432     * @exception IndexOutOfBoundsException Thrown if offset16 is invalid.
1433     */
1434    public static int delete(char target[], int limit, int offset16) {
1435        int count = 1;
1436        switch (bounds(target, 0, limit, offset16)) {
1437        case LEAD_SURROGATE_BOUNDARY:
1438            count++;
1439            break;
1440        case TRAIL_SURROGATE_BOUNDARY:
1441            count++;
1442            offset16--;
1443            break;
1444        }
1445        System.arraycopy(target, offset16 + count, target, offset16, limit - (offset16 + count));
1446        target[limit - count] = 0;
1447        return limit - count;
1448    }
1449
1450    /**
1451     * Returns the index within the argument UTF16 format Unicode string of the first occurrence of
1452     * the argument codepoint. I.e., the smallest index <code>i</code> such that
1453     * <code>UTF16.charAt(source, i) ==
1454     * char32</code> is true.
1455     * <p>
1456     * If no such character occurs in this string, then -1 is returned.
1457     * </p>
1458     * <p>
1459     * Examples:<br>
1460     * UTF16.indexOf("abc", 'a') returns 0<br>
1461     * UTF16.indexOf("abc\ud800\udc00", 0x10000) returns 3<br>
1462     * UTF16.indexOf("abc\ud800\udc00", 0xd800) returns -1<br>
1463     * </p>
1464     * Note this method is provided as support to jdk 1.3, which does not support supplementary
1465     * characters to its fullest.
1466     *
1467     * @param source UTF16 format Unicode string that will be searched
1468     * @param char32 Codepoint to search for
1469     * @return the index of the first occurrence of the codepoint in the argument Unicode string, or
1470     *         -1 if the codepoint does not occur.
1471     */
1472    public static int indexOf(String source, int char32) {
1473        if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
1474            throw new IllegalArgumentException("Argument char32 is not a valid codepoint");
1475        }
1476        // non-surrogate bmp
1477        if (char32 < LEAD_SURROGATE_MIN_VALUE
1478                || (char32 > TRAIL_SURROGATE_MAX_VALUE && char32 < SUPPLEMENTARY_MIN_VALUE)) {
1479            return source.indexOf((char) char32);
1480        }
1481        // surrogate
1482        if (char32 < SUPPLEMENTARY_MIN_VALUE) {
1483            int result = source.indexOf((char) char32);
1484            if (result >= 0) {
1485                if (isLeadSurrogate((char) char32) && (result < source.length() - 1)
1486                        && isTrailSurrogate(source.charAt(result + 1))) {
1487                    return indexOf(source, char32, result + 1);
1488                }
1489                // trail surrogate
1490                if (result > 0 && isLeadSurrogate(source.charAt(result - 1))) {
1491                    return indexOf(source, char32, result + 1);
1492                }
1493            }
1494            return result;
1495        }
1496        // supplementary
1497        String char32str = toString(char32);
1498        return source.indexOf(char32str);
1499    }
1500
1501    /**
1502     * Returns the index within the argument UTF16 format Unicode string of the first occurrence of
1503     * the argument string str. This method is implemented based on codepoints, hence a "lead
1504     * surrogate character + trail surrogate character" is treated as one entity.e Hence if the str
1505     * starts with trail surrogate character at index 0, a source with a leading a surrogate
1506     * character before str found at in source will not have a valid match. Vice versa for lead
1507     * surrogates that ends str. See example below.
1508     * <p>
1509     * If no such string str occurs in this source, then -1 is returned.
1510     * </p>
1511     * <p>
1512     * Examples:<br>
1513     * UTF16.indexOf("abc", "ab") returns 0<br>
1514     * UTF16.indexOf("abc\ud800\udc00", "\ud800\udc00") returns 3<br>
1515     * UTF16.indexOf("abc\ud800\udc00", "\ud800") returns -1<br>
1516     * </p>
1517     * Note this method is provided as support to jdk 1.3, which does not support supplementary
1518     * characters to its fullest.
1519     *
1520     * @param source UTF16 format Unicode string that will be searched
1521     * @param str UTF16 format Unicode string to search for
1522     * @return the index of the first occurrence of the codepoint in the argument Unicode string, or
1523     *         -1 if the codepoint does not occur.
1524     */
1525    public static int indexOf(String source, String str) {
1526        int strLength = str.length();
1527        // non-surrogate ends
1528        if (!isTrailSurrogate(str.charAt(0)) && !isLeadSurrogate(str.charAt(strLength - 1))) {
1529            return source.indexOf(str);
1530        }
1531
1532        int result = source.indexOf(str);
1533        int resultEnd = result + strLength;
1534        if (result >= 0) {
1535            // check last character
1536            if (isLeadSurrogate(str.charAt(strLength - 1)) && (result < source.length() - 1)
1537                    && isTrailSurrogate(source.charAt(resultEnd + 1))) {
1538                return indexOf(source, str, resultEnd + 1);
1539            }
1540            // check first character which is a trail surrogate
1541            if (isTrailSurrogate(str.charAt(0)) && result > 0
1542                    && isLeadSurrogate(source.charAt(result - 1))) {
1543                return indexOf(source, str, resultEnd + 1);
1544            }
1545        }
1546        return result;
1547    }
1548
1549    /**
1550     * Returns the index within the argument UTF16 format Unicode string of the first occurrence of
1551     * the argument codepoint. I.e., the smallest index i such that: <br>
1552     * (UTF16.charAt(source, i) == char32 &amp;&amp; i &gt;= fromIndex) is true.
1553     * <p>
1554     * If no such character occurs in this string, then -1 is returned.
1555     * </p>
1556     * <p>
1557     * Examples:<br>
1558     * UTF16.indexOf("abc", 'a', 1) returns -1<br>
1559     * UTF16.indexOf("abc\ud800\udc00", 0x10000, 1) returns 3<br>
1560     * UTF16.indexOf("abc\ud800\udc00", 0xd800, 1) returns -1<br>
1561     * </p>
1562     * Note this method is provided as support to jdk 1.3, which does not support supplementary
1563     * characters to its fullest.
1564     *
1565     * @param source UTF16 format Unicode string that will be searched
1566     * @param char32 Codepoint to search for
1567     * @param fromIndex The index to start the search from.
1568     * @return the index of the first occurrence of the codepoint in the argument Unicode string at
1569     *         or after fromIndex, or -1 if the codepoint does not occur.
1570     */
1571    public static int indexOf(String source, int char32, int fromIndex) {
1572        if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
1573            throw new IllegalArgumentException("Argument char32 is not a valid codepoint");
1574        }
1575        // non-surrogate bmp
1576        if (char32 < LEAD_SURROGATE_MIN_VALUE
1577                || (char32 > TRAIL_SURROGATE_MAX_VALUE && char32 < SUPPLEMENTARY_MIN_VALUE)) {
1578            return source.indexOf((char) char32, fromIndex);
1579        }
1580        // surrogate
1581        if (char32 < SUPPLEMENTARY_MIN_VALUE) {
1582            int result = source.indexOf((char) char32, fromIndex);
1583            if (result >= 0) {
1584                if (isLeadSurrogate((char) char32) && (result < source.length() - 1)
1585                        && isTrailSurrogate(source.charAt(result + 1))) {
1586                    return indexOf(source, char32, result + 1);
1587                }
1588                // trail surrogate
1589                if (result > 0 && isLeadSurrogate(source.charAt(result - 1))) {
1590                    return indexOf(source, char32, result + 1);
1591                }
1592            }
1593            return result;
1594        }
1595        // supplementary
1596        String char32str = toString(char32);
1597        return source.indexOf(char32str, fromIndex);
1598    }
1599
1600    /**
1601     * Returns the index within the argument UTF16 format Unicode string of the first occurrence of
1602     * the argument string str. This method is implemented based on codepoints, hence a "lead
1603     * surrogate character + trail surrogate character" is treated as one entity.e Hence if the str
1604     * starts with trail surrogate character at index 0, a source with a leading a surrogate
1605     * character before str found at in source will not have a valid match. Vice versa for lead
1606     * surrogates that ends str. See example below.
1607     * <p>
1608     * If no such string str occurs in this source, then -1 is returned.
1609     * </p>
1610     * <p>
1611     * Examples:<br>
1612     * UTF16.indexOf("abc", "ab", 0) returns 0<br>
1613     * UTF16.indexOf("abc\ud800\udc00", "\ud800\udc00", 0) returns 3<br>
1614     * UTF16.indexOf("abc\ud800\udc00", "\ud800\udc00", 2) returns 3<br>
1615     * UTF16.indexOf("abc\ud800\udc00", "\ud800", 0) returns -1<br>
1616     * </p>
1617     * Note this method is provided as support to jdk 1.3, which does not support supplementary
1618     * characters to its fullest.
1619     *
1620     * @param source UTF16 format Unicode string that will be searched
1621     * @param str UTF16 format Unicode string to search for
1622     * @param fromIndex The index to start the search from.
1623     * @return the index of the first occurrence of the codepoint in the argument Unicode string, or
1624     *         -1 if the codepoint does not occur.
1625     */
1626    public static int indexOf(String source, String str, int fromIndex) {
1627        int strLength = str.length();
1628        // non-surrogate ends
1629        if (!isTrailSurrogate(str.charAt(0)) && !isLeadSurrogate(str.charAt(strLength - 1))) {
1630            return source.indexOf(str, fromIndex);
1631        }
1632
1633        int result = source.indexOf(str, fromIndex);
1634        int resultEnd = result + strLength;
1635        if (result >= 0) {
1636            // check last character
1637            if (isLeadSurrogate(str.charAt(strLength - 1)) && (result < source.length() - 1)
1638                    && isTrailSurrogate(source.charAt(resultEnd))) {
1639                return indexOf(source, str, resultEnd + 1);
1640            }
1641            // check first character which is a trail surrogate
1642            if (isTrailSurrogate(str.charAt(0)) && result > 0
1643                    && isLeadSurrogate(source.charAt(result - 1))) {
1644                return indexOf(source, str, resultEnd + 1);
1645            }
1646        }
1647        return result;
1648    }
1649
1650    /**
1651     * Returns the index within the argument UTF16 format Unicode string of the last occurrence of
1652     * the argument codepoint. I.e., the index returned is the largest value i such that:
1653     * UTF16.charAt(source, i) == char32 is true.
1654     * <p>
1655     * Examples:<br>
1656     * UTF16.lastIndexOf("abc", 'a') returns 0<br>
1657     * UTF16.lastIndexOf("abc\ud800\udc00", 0x10000) returns 3<br>
1658     * UTF16.lastIndexOf("abc\ud800\udc00", 0xd800) returns -1<br>
1659     * </p>
1660     * <p>
1661     * source is searched backwards starting at the last character.
1662     * </p>
1663     * Note this method is provided as support to jdk 1.3, which does not support supplementary
1664     * characters to its fullest.
1665     *
1666     * @param source UTF16 format Unicode string that will be searched
1667     * @param char32 Codepoint to search for
1668     * @return the index of the last occurrence of the codepoint in source, or -1 if the codepoint
1669     *         does not occur.
1670     */
1671    public static int lastIndexOf(String source, int char32) {
1672        if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
1673            throw new IllegalArgumentException("Argument char32 is not a valid codepoint");
1674        }
1675        // non-surrogate bmp
1676        if (char32 < LEAD_SURROGATE_MIN_VALUE
1677                || (char32 > TRAIL_SURROGATE_MAX_VALUE && char32 < SUPPLEMENTARY_MIN_VALUE)) {
1678            return source.lastIndexOf((char) char32);
1679        }
1680        // surrogate
1681        if (char32 < SUPPLEMENTARY_MIN_VALUE) {
1682            int result = source.lastIndexOf((char) char32);
1683            if (result >= 0) {
1684                if (isLeadSurrogate((char) char32) && (result < source.length() - 1)
1685                        && isTrailSurrogate(source.charAt(result + 1))) {
1686                    return lastIndexOf(source, char32, result - 1);
1687                }
1688                // trail surrogate
1689                if (result > 0 && isLeadSurrogate(source.charAt(result - 1))) {
1690                    return lastIndexOf(source, char32, result - 1);
1691                }
1692            }
1693            return result;
1694        }
1695        // supplementary
1696        String char32str = toString(char32);
1697        return source.lastIndexOf(char32str);
1698    }
1699
1700    /**
1701     * Returns the index within the argument UTF16 format Unicode string of the last occurrence of
1702     * the argument string str. This method is implemented based on codepoints, hence a "lead
1703     * surrogate character + trail surrogate character" is treated as one entity.e Hence if the str
1704     * starts with trail surrogate character at index 0, a source with a leading a surrogate
1705     * character before str found at in source will not have a valid match. Vice versa for lead
1706     * surrogates that ends str. See example below.
1707     * <p>
1708     * Examples:<br>
1709     * UTF16.lastIndexOf("abc", "a") returns 0<br>
1710     * UTF16.lastIndexOf("abc\ud800\udc00", "\ud800\udc00") returns 3<br>
1711     * UTF16.lastIndexOf("abc\ud800\udc00", "\ud800") returns -1<br>
1712     * </p>
1713     * <p>
1714     * source is searched backwards starting at the last character.
1715     * </p>
1716     * Note this method is provided as support to jdk 1.3, which does not support supplementary
1717     * characters to its fullest.
1718     *
1719     * @param source UTF16 format Unicode string that will be searched
1720     * @param str UTF16 format Unicode string to search for
1721     * @return the index of the last occurrence of the codepoint in source, or -1 if the codepoint
1722     *         does not occur.
1723     */
1724    public static int lastIndexOf(String source, String str) {
1725        int strLength = str.length();
1726        // non-surrogate ends
1727        if (!isTrailSurrogate(str.charAt(0)) && !isLeadSurrogate(str.charAt(strLength - 1))) {
1728            return source.lastIndexOf(str);
1729        }
1730
1731        int result = source.lastIndexOf(str);
1732        if (result >= 0) {
1733            // check last character
1734            if (isLeadSurrogate(str.charAt(strLength - 1)) && (result < source.length() - 1)
1735                    && isTrailSurrogate(source.charAt(result + strLength + 1))) {
1736                return lastIndexOf(source, str, result - 1);
1737            }
1738            // check first character which is a trail surrogate
1739            if (isTrailSurrogate(str.charAt(0)) && result > 0
1740                    && isLeadSurrogate(source.charAt(result - 1))) {
1741                return lastIndexOf(source, str, result - 1);
1742            }
1743        }
1744        return result;
1745    }
1746
1747    /**
1748     * <p>
1749     * Returns the index within the argument UTF16 format Unicode string of the last occurrence of
1750     * the argument codepoint, where the result is less than or equals to fromIndex.
1751     * </p>
1752     * <p>
1753     * This method is implemented based on codepoints, hence a single surrogate character will not
1754     * match a supplementary character.
1755     * </p>
1756     * <p>
1757     * source is searched backwards starting at the last character starting at the specified index.
1758     * </p>
1759     * <p>
1760     * Examples:<br>
1761     * UTF16.lastIndexOf("abc", 'c', 2) returns 2<br>
1762     * UTF16.lastIndexOf("abc", 'c', 1) returns -1<br>
1763     * UTF16.lastIndexOf("abc\ud800\udc00", 0x10000, 5) returns 3<br>
1764     * UTF16.lastIndexOf("abc\ud800\udc00", 0x10000, 3) returns 3<br>
1765     * UTF16.lastIndexOf("abc\ud800\udc00", 0xd800) returns -1<br>
1766     * </p>
1767     * Note this method is provided as support to jdk 1.3, which does not support supplementary
1768     * characters to its fullest.
1769     *
1770     * @param source UTF16 format Unicode string that will be searched
1771     * @param char32 Codepoint to search for
1772     * @param fromIndex the index to start the search from. There is no restriction on the value of
1773     *            fromIndex. If it is greater than or equal to the length of this string, it has the
1774     *            same effect as if it were equal to one less than the length of this string: this
1775     *            entire string may be searched. If it is negative, it has the same effect as if it
1776     *            were -1: -1 is returned.
1777     * @return the index of the last occurrence of the codepoint in source, or -1 if the codepoint
1778     *         does not occur.
1779     */
1780    public static int lastIndexOf(String source, int char32, int fromIndex) {
1781        if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
1782            throw new IllegalArgumentException("Argument char32 is not a valid codepoint");
1783        }
1784        // non-surrogate bmp
1785        if (char32 < LEAD_SURROGATE_MIN_VALUE
1786                || (char32 > TRAIL_SURROGATE_MAX_VALUE && char32 < SUPPLEMENTARY_MIN_VALUE)) {
1787            return source.lastIndexOf((char) char32, fromIndex);
1788        }
1789        // surrogate
1790        if (char32 < SUPPLEMENTARY_MIN_VALUE) {
1791            int result = source.lastIndexOf((char) char32, fromIndex);
1792            if (result >= 0) {
1793                if (isLeadSurrogate((char) char32) && (result < source.length() - 1)
1794                        && isTrailSurrogate(source.charAt(result + 1))) {
1795                    return lastIndexOf(source, char32, result - 1);
1796                }
1797                // trail surrogate
1798                if (result > 0 && isLeadSurrogate(source.charAt(result - 1))) {
1799                    return lastIndexOf(source, char32, result - 1);
1800                }
1801            }
1802            return result;
1803        }
1804        // supplementary
1805        String char32str = toString(char32);
1806        return source.lastIndexOf(char32str, fromIndex);
1807    }
1808
1809    /**
1810     * <p>
1811     * Returns the index within the argument UTF16 format Unicode string of the last occurrence of
1812     * the argument string str, where the result is less than or equals to fromIndex.
1813     * </p>
1814     * <p>
1815     * This method is implemented based on codepoints, hence a "lead surrogate character + trail
1816     * surrogate character" is treated as one entity. Hence if the str starts with trail surrogate
1817     * character at index 0, a source with a leading a surrogate character before str found at in
1818     * source will not have a valid match. Vice versa for lead surrogates that ends str.
1819     * </p>
1820     * See example below.
1821     * <p>
1822     * Examples:<br>
1823     * UTF16.lastIndexOf("abc", "c", 2) returns 2<br>
1824     * UTF16.lastIndexOf("abc", "c", 1) returns -1<br>
1825     * UTF16.lastIndexOf("abc\ud800\udc00", "\ud800\udc00", 5) returns 3<br>
1826     * UTF16.lastIndexOf("abc\ud800\udc00", "\ud800\udc00", 3) returns 3<br>
1827     * UTF16.lastIndexOf("abc\ud800\udc00", "\ud800", 4) returns -1<br>
1828     * </p>
1829     * <p>
1830     * source is searched backwards starting at the last character.
1831     * </p>
1832     * Note this method is provided as support to jdk 1.3, which does not support supplementary
1833     * characters to its fullest.
1834     *
1835     * @param source UTF16 format Unicode string that will be searched
1836     * @param str UTF16 format Unicode string to search for
1837     * @param fromIndex the index to start the search from. There is no restriction on the value of
1838     *            fromIndex. If it is greater than or equal to the length of this string, it has the
1839     *            same effect as if it were equal to one less than the length of this string: this
1840     *            entire string may be searched. If it is negative, it has the same effect as if it
1841     *            were -1: -1 is returned.
1842     * @return the index of the last occurrence of the codepoint in source, or -1 if the codepoint
1843     *         does not occur.
1844     */
1845    public static int lastIndexOf(String source, String str, int fromIndex) {
1846        int strLength = str.length();
1847        // non-surrogate ends
1848        if (!isTrailSurrogate(str.charAt(0)) && !isLeadSurrogate(str.charAt(strLength - 1))) {
1849            return source.lastIndexOf(str, fromIndex);
1850        }
1851
1852        int result = source.lastIndexOf(str, fromIndex);
1853        if (result >= 0) {
1854            // check last character
1855            if (isLeadSurrogate(str.charAt(strLength - 1)) && (result < source.length() - 1)
1856                    && isTrailSurrogate(source.charAt(result + strLength))) {
1857                return lastIndexOf(source, str, result - 1);
1858            }
1859            // check first character which is a trail surrogate
1860            if (isTrailSurrogate(str.charAt(0)) && result > 0
1861                    && isLeadSurrogate(source.charAt(result - 1))) {
1862                return lastIndexOf(source, str, result - 1);
1863            }
1864        }
1865        return result;
1866    }
1867
1868    /**
1869     * Returns a new UTF16 format Unicode string resulting from replacing all occurrences of
1870     * oldChar32 in source with newChar32. If the character oldChar32 does not occur in the UTF16
1871     * format Unicode string source, then source will be returned. Otherwise, a new String object is
1872     * created that represents a codepoint sequence identical to the codepoint sequence represented
1873     * by source, except that every occurrence of oldChar32 is replaced by an occurrence of
1874     * newChar32.
1875     * <p>
1876     * Examples: <br>
1877     * UTF16.replace("mesquite in your cellar", 'e', 'o');<br>
1878     * returns "mosquito in your collar"<br>
1879     * UTF16.replace("JonL", 'q', 'x');<br>
1880     * returns "JonL" (no change)<br>
1881     * UTF16.replace("Supplementary character \ud800\udc00", 0x10000, '!'); <br>
1882     * returns "Supplementary character !"<br>
1883     * UTF16.replace("Supplementary character \ud800\udc00", 0xd800, '!'); <br>
1884     * returns "Supplementary character \ud800\udc00"<br>
1885     * </p>
1886     * Note this method is provided as support to jdk 1.3, which does not support supplementary
1887     * characters to its fullest.
1888     *
1889     * @param source UTF16 format Unicode string which the codepoint replacements will be based on.
1890     * @param oldChar32 Non-zero old codepoint to be replaced.
1891     * @param newChar32 The new codepoint to replace oldChar32
1892     * @return new String derived from source by replacing every occurrence of oldChar32 with
1893     *         newChar32, unless when no oldChar32 is found in source then source will be returned.
1894     */
1895    public static String replace(String source, int oldChar32, int newChar32) {
1896        if (oldChar32 <= 0 || oldChar32 > CODEPOINT_MAX_VALUE) {
1897            throw new IllegalArgumentException("Argument oldChar32 is not a valid codepoint");
1898        }
1899        if (newChar32 <= 0 || newChar32 > CODEPOINT_MAX_VALUE) {
1900            throw new IllegalArgumentException("Argument newChar32 is not a valid codepoint");
1901        }
1902
1903        int index = indexOf(source, oldChar32);
1904        if (index == -1) {
1905            return source;
1906        }
1907        String newChar32Str = toString(newChar32);
1908        int oldChar32Size = 1;
1909        int newChar32Size = newChar32Str.length();
1910        StringBuffer result = new StringBuffer(source);
1911        int resultIndex = index;
1912
1913        if (oldChar32 >= SUPPLEMENTARY_MIN_VALUE) {
1914            oldChar32Size = 2;
1915        }
1916
1917        while (index != -1) {
1918            int endResultIndex = resultIndex + oldChar32Size;
1919            result.replace(resultIndex, endResultIndex, newChar32Str);
1920            int lastEndIndex = index + oldChar32Size;
1921            index = indexOf(source, oldChar32, lastEndIndex);
1922            resultIndex += newChar32Size + index - lastEndIndex;
1923        }
1924        return result.toString();
1925    }
1926
1927    /**
1928     * Returns a new UTF16 format Unicode string resulting from replacing all occurrences of oldStr
1929     * in source with newStr. If the string oldStr does not occur in the UTF16 format Unicode string
1930     * source, then source will be returned. Otherwise, a new String object is created that
1931     * represents a codepoint sequence identical to the codepoint sequence represented by source,
1932     * except that every occurrence of oldStr is replaced by an occurrence of newStr.
1933     * <p>
1934     * Examples: <br>
1935     * UTF16.replace("mesquite in your cellar", "e", "o");<br>
1936     * returns "mosquito in your collar"<br>
1937     * UTF16.replace("mesquite in your cellar", "mesquite", "cat");<br>
1938     * returns "cat in your cellar"<br>
1939     * UTF16.replace("JonL", "q", "x");<br>
1940     * returns "JonL" (no change)<br>
1941     * UTF16.replace("Supplementary character \ud800\udc00", "\ud800\udc00", '!'); <br>
1942     * returns "Supplementary character !"<br>
1943     * UTF16.replace("Supplementary character \ud800\udc00", "\ud800", '!'); <br>
1944     * returns "Supplementary character \ud800\udc00"<br>
1945     * </p>
1946     * Note this method is provided as support to jdk 1.3, which does not support supplementary
1947     * characters to its fullest.
1948     *
1949     * @param source UTF16 format Unicode string which the replacements will be based on.
1950     * @param oldStr Non-zero-length string to be replaced.
1951     * @param newStr The new string to replace oldStr
1952     * @return new String derived from source by replacing every occurrence of oldStr with newStr.
1953     *         When no oldStr is found in source, then source will be returned.
1954     */
1955    public static String replace(String source, String oldStr, String newStr) {
1956        int index = indexOf(source, oldStr);
1957        if (index == -1) {
1958            return source;
1959        }
1960        int oldStrSize = oldStr.length();
1961        int newStrSize = newStr.length();
1962        StringBuffer result = new StringBuffer(source);
1963        int resultIndex = index;
1964
1965        while (index != -1) {
1966            int endResultIndex = resultIndex + oldStrSize;
1967            result.replace(resultIndex, endResultIndex, newStr);
1968            int lastEndIndex = index + oldStrSize;
1969            index = indexOf(source, oldStr, lastEndIndex);
1970            resultIndex += newStrSize + index - lastEndIndex;
1971        }
1972        return result.toString();
1973    }
1974
1975    /**
1976     * Reverses a UTF16 format Unicode string and replaces source's content with it. This method
1977     * will reverse surrogate characters correctly, instead of blindly reversing every character.
1978     * <p>
1979     * Examples:<br>
1980     * UTF16.reverse(new StringBuffer( "Supplementary characters \ud800\udc00\ud801\udc01"))<br>
1981     * returns "\ud801\udc01\ud800\udc00 sretcarahc yratnemelppuS".
1982     *
1983     * @param source The source StringBuffer that contains UTF16 format Unicode string to be reversed
1984     * @return a modified source with reversed UTF16 format Unicode string.
1985     */
1986    public static StringBuffer reverse(StringBuffer source) {
1987        int length = source.length();
1988        StringBuffer result = new StringBuffer(length);
1989        for (int i = length; i-- > 0;) {
1990            char ch = source.charAt(i);
1991            if (isTrailSurrogate(ch) && i > 0) {
1992                char ch2 = source.charAt(i - 1);
1993                if (isLeadSurrogate(ch2)) {
1994                    result.append(ch2);
1995                    result.append(ch);
1996                    --i;
1997                    continue;
1998                }
1999            }
2000            result.append(ch);
2001        }
2002        return result;
2003    }
2004
2005    /**
2006     * Check if the string contains more Unicode code points than a certain number. This is more
2007     * efficient than counting all code points in the entire string and comparing that number with a
2008     * threshold. This function may not need to scan the string at all if the length is within a
2009     * certain range, and never needs to count more than 'number + 1' code points. Logically
2010     * equivalent to (countCodePoint(s) &gt; number). A Unicode code point may occupy either one or two
2011     * code units.
2012     *
2013     * @param source The input string.
2014     * @param number The number of code points in the string is compared against the 'number'
2015     *            parameter.
2016     * @return boolean value for whether the string contains more Unicode code points than 'number'.
2017     */
2018    public static boolean hasMoreCodePointsThan(String source, int number) {
2019        if (number < 0) {
2020            return true;
2021        }
2022        if (source == null) {
2023            return false;
2024        }
2025        int length = source.length();
2026
2027        // length >= 0 known
2028        // source contains at least (length + 1) / 2 code points: <= 2
2029        // chars per cp
2030        if (((length + 1) >> 1) > number) {
2031            return true;
2032        }
2033
2034        // check if source does not even contain enough chars
2035        int maxsupplementary = length - number;
2036        if (maxsupplementary <= 0) {
2037            return false;
2038        }
2039
2040        // there are maxsupplementary = length - number more chars than
2041        // asked-for code points
2042
2043        // count code points until they exceed and also check that there are
2044        // no more than maxsupplementary supplementary code points (char pairs)
2045        int start = 0;
2046        while (true) {
2047            if (length == 0) {
2048                return false;
2049            }
2050            if (number == 0) {
2051                return true;
2052            }
2053            if (isLeadSurrogate(source.charAt(start++)) && start != length
2054                    && isTrailSurrogate(source.charAt(start))) {
2055                start++;
2056                if (--maxsupplementary <= 0) {
2057                    // too many pairs - too few code points
2058                    return false;
2059                }
2060            }
2061            --number;
2062        }
2063    }
2064
2065    /**
2066     * Check if the sub-range of char array, from argument start to limit, contains more Unicode
2067     * code points than a certain number. This is more efficient than counting all code points in
2068     * the entire char array range and comparing that number with a threshold. This function may not
2069     * need to scan the char array at all if start and limit is within a certain range, and never
2070     * needs to count more than 'number + 1' code points. Logically equivalent to
2071     * (countCodePoint(source, start, limit) &gt; number). A Unicode code point may occupy either one
2072     * or two code units.
2073     *
2074     * @param source Array of UTF-16 chars
2075     * @param start Offset to substring in the source array for analyzing
2076     * @param limit Offset to substring in the source array for analyzing
2077     * @param number The number of code points in the string is compared against the 'number'
2078     *            parameter.
2079     * @return boolean value for whether the string contains more Unicode code points than 'number'.
2080     * @exception IndexOutOfBoundsException Thrown when limit &lt; start
2081     */
2082    public static boolean hasMoreCodePointsThan(char source[], int start, int limit, int number) {
2083        int length = limit - start;
2084        if (length < 0 || start < 0 || limit < 0) {
2085            throw new IndexOutOfBoundsException(
2086                    "Start and limit indexes should be non-negative and start <= limit");
2087        }
2088        if (number < 0) {
2089            return true;
2090        }
2091        if (source == null) {
2092            return false;
2093        }
2094
2095        // length >= 0 known
2096        // source contains at least (length + 1) / 2 code points: <= 2
2097        // chars per cp
2098        if (((length + 1) >> 1) > number) {
2099            return true;
2100        }
2101
2102        // check if source does not even contain enough chars
2103        int maxsupplementary = length - number;
2104        if (maxsupplementary <= 0) {
2105            return false;
2106        }
2107
2108        // there are maxsupplementary = length - number more chars than
2109        // asked-for code points
2110
2111        // count code points until they exceed and also check that there are
2112        // no more than maxsupplementary supplementary code points (char pairs)
2113        while (true) {
2114            if (length == 0) {
2115                return false;
2116            }
2117            if (number == 0) {
2118                return true;
2119            }
2120            if (isLeadSurrogate(source[start++]) && start != limit
2121                    && isTrailSurrogate(source[start])) {
2122                start++;
2123                if (--maxsupplementary <= 0) {
2124                    // too many pairs - too few code points
2125                    return false;
2126                }
2127            }
2128            --number;
2129        }
2130    }
2131
2132    /**
2133     * Check if the string buffer contains more Unicode code points than a certain number. This is
2134     * more efficient than counting all code points in the entire string buffer and comparing that
2135     * number with a threshold. This function may not need to scan the string buffer at all if the
2136     * length is within a certain range, and never needs to count more than 'number + 1' code
2137     * points. Logically equivalent to (countCodePoint(s) &gt; number). A Unicode code point may
2138     * occupy either one or two code units.
2139     *
2140     * @param source The input string buffer.
2141     * @param number The number of code points in the string buffer is compared against the 'number'
2142     *            parameter.
2143     * @return boolean value for whether the string buffer contains more Unicode code points than
2144     *         'number'.
2145     */
2146    public static boolean hasMoreCodePointsThan(StringBuffer source, int number) {
2147        if (number < 0) {
2148            return true;
2149        }
2150        if (source == null) {
2151            return false;
2152        }
2153        int length = source.length();
2154
2155        // length >= 0 known
2156        // source contains at least (length + 1) / 2 code points: <= 2
2157        // chars per cp
2158        if (((length + 1) >> 1) > number) {
2159            return true;
2160        }
2161
2162        // check if source does not even contain enough chars
2163        int maxsupplementary = length - number;
2164        if (maxsupplementary <= 0) {
2165            return false;
2166        }
2167
2168        // there are maxsupplementary = length - number more chars than
2169        // asked-for code points
2170
2171        // count code points until they exceed and also check that there are
2172        // no more than maxsupplementary supplementary code points (char pairs)
2173        int start = 0;
2174        while (true) {
2175            if (length == 0) {
2176                return false;
2177            }
2178            if (number == 0) {
2179                return true;
2180            }
2181            if (isLeadSurrogate(source.charAt(start++)) && start != length
2182                    && isTrailSurrogate(source.charAt(start))) {
2183                start++;
2184                if (--maxsupplementary <= 0) {
2185                    // too many pairs - too few code points
2186                    return false;
2187                }
2188            }
2189            --number;
2190        }
2191    }
2192
2193    /**
2194     * Cover JDK 1.5 API. Create a String from an array of codePoints.
2195     *
2196     * @param codePoints The code array
2197     * @param offset The start of the text in the code point array
2198     * @param count The number of code points
2199     * @return a String representing the code points between offset and count
2200     * @throws IllegalArgumentException If an invalid code point is encountered
2201     * @throws IndexOutOfBoundsException If the offset or count are out of bounds.
2202     */
2203    public static String newString(int[] codePoints, int offset, int count) {
2204        if (count < 0) {
2205            throw new IllegalArgumentException();
2206        }
2207        char[] chars = new char[count];
2208        int w = 0;
2209        for (int r = offset, e = offset + count; r < e; ++r) {
2210            int cp = codePoints[r];
2211            if (cp < 0 || cp > 0x10ffff) {
2212                throw new IllegalArgumentException();
2213            }
2214            while (true) {
2215                try {
2216                    if (cp < 0x010000) {
2217                        chars[w] = (char) cp;
2218                        w++;
2219                    } else {
2220                        chars[w] = (char) (LEAD_SURROGATE_OFFSET_ + (cp >> LEAD_SURROGATE_SHIFT_));
2221                        chars[w + 1] = (char) (TRAIL_SURROGATE_MIN_VALUE + (cp & TRAIL_SURROGATE_MASK_));
2222                        w += 2;
2223                    }
2224                    break;
2225                } catch (IndexOutOfBoundsException ex) {
2226                    int newlen = (int) (Math.ceil((double) codePoints.length * (w + 2)
2227                            / (r - offset + 1)));
2228                    char[] temp = new char[newlen];
2229                    System.arraycopy(chars, 0, temp, 0, w);
2230                    chars = temp;
2231                }
2232            }
2233        }
2234        return new String(chars, 0, w);
2235    }
2236
2237    /**
2238     * <p>
2239     * UTF16 string comparator class. Allows UTF16 string comparison to be done with the various
2240     * modes
2241     * </p>
2242     * <ul>
2243     * <li> Code point comparison or code unit comparison
2244     * <li> Case sensitive comparison, case insensitive comparison or case insensitive comparison
2245     * with special handling for character 'i'.
2246     * </ul>
2247     * <p>
2248     * The code unit or code point comparison differ only when comparing supplementary code points
2249     * (&#92;u10000..&#92;u10ffff) to BMP code points near the end of the BMP (i.e.,
2250     * &#92;ue000..&#92;uffff). In code unit comparison, high BMP code points sort after
2251     * supplementary code points because they are stored as pairs of surrogates which are at
2252     * &#92;ud800..&#92;udfff.
2253     * </p>
2254     *
2255     * @see #FOLD_CASE_DEFAULT
2256     * @see #FOLD_CASE_EXCLUDE_SPECIAL_I
2257     */
2258    public static final class StringComparator implements java.util.Comparator<String> {
2259        // public constructor ------------------------------------------------
2260
2261        /**
2262         * Default constructor that does code unit comparison and case sensitive comparison.
2263         */
2264        public StringComparator() {
2265            this(false, false, FOLD_CASE_DEFAULT);
2266        }
2267
2268        /**
2269         * Constructor that does comparison based on the argument options.
2270         *
2271         * @param codepointcompare Flag to indicate true for code point comparison or false for code unit
2272         *            comparison.
2273         * @param ignorecase False for case sensitive comparison, true for case-insensitive comparison
2274         * @param foldcaseoption FOLD_CASE_DEFAULT or FOLD_CASE_EXCLUDE_SPECIAL_I. This option is used only
2275         *            when ignorecase is set to true. If ignorecase is false, this option is
2276         *            ignored.
2277         * @see #FOLD_CASE_DEFAULT
2278         * @see #FOLD_CASE_EXCLUDE_SPECIAL_I
2279         * @throws IllegalArgumentException If foldcaseoption is out of range
2280         */
2281        public StringComparator(boolean codepointcompare, boolean ignorecase, int foldcaseoption) {
2282            setCodePointCompare(codepointcompare);
2283            m_ignoreCase_ = ignorecase;
2284            if (foldcaseoption < FOLD_CASE_DEFAULT || foldcaseoption > FOLD_CASE_EXCLUDE_SPECIAL_I) {
2285                throw new IllegalArgumentException("Invalid fold case option");
2286            }
2287            m_foldCase_ = foldcaseoption;
2288        }
2289
2290        // public data member ------------------------------------------------
2291
2292        /**
2293         * Option value for case folding comparison:
2294         *
2295         * <p>Comparison is case insensitive, strings are folded using default mappings defined in
2296         * Unicode data file CaseFolding.txt, before comparison.
2297         */
2298        public static final int FOLD_CASE_DEFAULT = 0;
2299
2300        /**
2301         * Option value for case folding:
2302         * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
2303         * and dotless i appropriately for Turkic languages (tr, az).
2304         *
2305         * <p>Comparison is case insensitive, strings are folded using modified mappings defined in
2306         * Unicode data file CaseFolding.txt, before comparison.
2307         *
2308         * @see android.icu.lang.UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I
2309         */
2310        public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 1;
2311
2312        // public methods ----------------------------------------------------
2313
2314        // public setters ----------------------------------------------------
2315
2316        /**
2317         * Sets the comparison mode to code point compare if flag is true. Otherwise comparison mode
2318         * is set to code unit compare
2319         *
2320         * @param flag True for code point compare, false for code unit compare
2321         */
2322        public void setCodePointCompare(boolean flag) {
2323            if (flag) {
2324                m_codePointCompare_ = Normalizer.COMPARE_CODE_POINT_ORDER;
2325            } else {
2326                m_codePointCompare_ = 0;
2327            }
2328        }
2329
2330        /**
2331         * Sets the Comparator to case-insensitive comparison mode if argument is true, otherwise
2332         * case sensitive comparison mode if set to false.
2333         *
2334         * @param ignorecase True for case-insitive comparison, false for case sensitive comparison
2335         * @param foldcaseoption FOLD_CASE_DEFAULT or FOLD_CASE_EXCLUDE_SPECIAL_I. This option is used only
2336         *            when ignorecase is set to true. If ignorecase is false, this option is
2337         *            ignored.
2338         * @see #FOLD_CASE_DEFAULT
2339         * @see #FOLD_CASE_EXCLUDE_SPECIAL_I
2340         */
2341        public void setIgnoreCase(boolean ignorecase, int foldcaseoption) {
2342            m_ignoreCase_ = ignorecase;
2343            if (foldcaseoption < FOLD_CASE_DEFAULT || foldcaseoption > FOLD_CASE_EXCLUDE_SPECIAL_I) {
2344                throw new IllegalArgumentException("Invalid fold case option");
2345            }
2346            m_foldCase_ = foldcaseoption;
2347        }
2348
2349        // public getters ----------------------------------------------------
2350
2351        /**
2352         * Checks if the comparison mode is code point compare.
2353         *
2354         * @return true for code point compare, false for code unit compare
2355         */
2356        public boolean getCodePointCompare() {
2357            return m_codePointCompare_ == Normalizer.COMPARE_CODE_POINT_ORDER;
2358        }
2359
2360        /**
2361         * Checks if Comparator is in the case insensitive mode.
2362         *
2363         * @return true if Comparator performs case insensitive comparison, false otherwise
2364         */
2365        public boolean getIgnoreCase() {
2366            return m_ignoreCase_;
2367        }
2368
2369        /**
2370         * Gets the fold case options set in Comparator to be used with case insensitive comparison.
2371         *
2372         * @return either FOLD_CASE_DEFAULT or FOLD_CASE_EXCLUDE_SPECIAL_I
2373         * @see #FOLD_CASE_DEFAULT
2374         * @see #FOLD_CASE_EXCLUDE_SPECIAL_I
2375         */
2376        public int getIgnoreCaseOption() {
2377            return m_foldCase_;
2378        }
2379
2380        // public other methods ----------------------------------------------
2381
2382        /**
2383         * Compare two strings depending on the options selected during construction.
2384         *
2385         * @param a first source string.
2386         * @param b second source string.
2387         * @return 0 returned if a == b. If a &lt; b, a negative value is returned. Otherwise if a &gt; b,
2388         *         a positive value is returned.
2389         * @exception ClassCastException thrown when either a or b is not a String object
2390         */
2391        public int compare(String a, String b) {
2392            if (a == b) {
2393                return 0;
2394            }
2395            if (a == null) {
2396                return -1;
2397            }
2398            if (b == null) {
2399                return 1;
2400            }
2401
2402            if (m_ignoreCase_) {
2403                return compareCaseInsensitive(a, b);
2404            }
2405            return compareCaseSensitive(a, b);
2406        }
2407
2408        // private data member ----------------------------------------------
2409
2410        /**
2411         * Code unit comparison flag. True if code unit comparison is required. False if code point
2412         * comparison is required.
2413         */
2414        private int m_codePointCompare_;
2415
2416        /**
2417         * Fold case comparison option.
2418         */
2419        private int m_foldCase_;
2420
2421        /**
2422         * Flag indicator if ignore case is to be used during comparison
2423         */
2424        private boolean m_ignoreCase_;
2425
2426        /**
2427         * Code point order offset for surrogate characters
2428         */
2429        private static final int CODE_POINT_COMPARE_SURROGATE_OFFSET_ = 0x2800;
2430
2431        // private method ---------------------------------------------------
2432
2433        /**
2434         * Compares case insensitive. This is a direct port of ICU4C, to make maintainence life
2435         * easier.
2436         *
2437         * @param s1
2438         *            first string to compare
2439         * @param s2
2440         *            second string to compare
2441         * @return -1 is s1 &lt; s2, 0 if equals,
2442         */
2443        private int compareCaseInsensitive(String s1, String s2) {
2444            return Normalizer.cmpEquivFold(s1, s2, m_foldCase_ | m_codePointCompare_
2445                    | Normalizer.COMPARE_IGNORE_CASE);
2446        }
2447
2448        /**
2449         * Compares case sensitive. This is a direct port of ICU4C, to make maintainence life
2450         * easier.
2451         *
2452         * @param s1
2453         *            first string to compare
2454         * @param s2
2455         *            second string to compare
2456         * @return -1 is s1 &lt; s2, 0 if equals,
2457         */
2458        private int compareCaseSensitive(String s1, String s2) {
2459            // compare identical prefixes - they do not need to be fixed up
2460            // limit1 = start1 + min(lenght1, length2)
2461            int length1 = s1.length();
2462            int length2 = s2.length();
2463            int minlength = length1;
2464            int result = 0;
2465            if (length1 < length2) {
2466                result = -1;
2467            } else if (length1 > length2) {
2468                result = 1;
2469                minlength = length2;
2470            }
2471
2472            char c1 = 0;
2473            char c2 = 0;
2474            int index = 0;
2475            for (; index < minlength; index++) {
2476                c1 = s1.charAt(index);
2477                c2 = s2.charAt(index);
2478                // check pseudo-limit
2479                if (c1 != c2) {
2480                    break;
2481                }
2482            }
2483
2484            if (index == minlength) {
2485                return result;
2486            }
2487
2488            boolean codepointcompare = m_codePointCompare_ == Normalizer.COMPARE_CODE_POINT_ORDER;
2489            // if both values are in or above the surrogate range, fix them up
2490            if (c1 >= LEAD_SURROGATE_MIN_VALUE && c2 >= LEAD_SURROGATE_MIN_VALUE
2491                    && codepointcompare) {
2492                // subtract 0x2800 from BMP code points to make them smaller
2493                // than supplementary ones
2494                if ((c1 <= LEAD_SURROGATE_MAX_VALUE && (index + 1) != length1 && isTrailSurrogate(s1.charAt(index + 1)))
2495                        || (isTrailSurrogate(c1) && index != 0 && isLeadSurrogate(s1.charAt(index - 1)))) {
2496                    // part of a surrogate pair, leave >=d800
2497                } else {
2498                    // BMP code point - may be surrogate code point - make
2499                    // < d800
2500                    c1 -= CODE_POINT_COMPARE_SURROGATE_OFFSET_;
2501                }
2502
2503                if ((c2 <= LEAD_SURROGATE_MAX_VALUE && (index + 1) != length2 && isTrailSurrogate(s2.charAt(index + 1)))
2504                        || (isTrailSurrogate(c2) && index != 0 && isLeadSurrogate(s2.charAt(index - 1)))) {
2505                    // part of a surrogate pair, leave >=d800
2506                } else {
2507                    // BMP code point - may be surrogate code point - make <d800
2508                    c2 -= CODE_POINT_COMPARE_SURROGATE_OFFSET_;
2509                }
2510            }
2511
2512            // now c1 and c2 are in UTF-32-compatible order
2513            return c1 - c2;
2514        }
2515    }
2516
2517    /**
2518     * Utility for getting a code point from a CharSequence that contains exactly one code point.
2519     * @return the code point IF the string is non-null and consists of a single code point.
2520     * otherwise returns -1.
2521     * @param s to test
2522     */
2523    public static int getSingleCodePoint(CharSequence s) {
2524        if (s == null || s.length() == 0) {
2525            return -1;
2526        } else if (s.length() == 1) {
2527            return s.charAt(0);
2528        } else if (s.length() > 2) {
2529            return -1;
2530        }
2531
2532        // at this point, len = 2
2533        int cp = Character.codePointAt(s, 0);
2534        if (cp > 0xFFFF) { // is surrogate pair
2535            return cp;
2536        }
2537        return -1;
2538    }
2539
2540    /**
2541     * Utility for comparing a code point to a string without having to create a new string. Returns the same results
2542     * as a code point comparison of UTF16.valueOf(codePoint) and s.toString(). More specifically, if
2543     * <pre>
2544     * sc = new StringComparator(true,false,0);
2545     * fast = UTF16.compareCodePoint(codePoint, charSequence)
2546     * slower = sc.compare(UTF16.valueOf(codePoint), charSequence == null ? "" : charSequence.toString())
2547     * </pre>
2548     * then
2549     * <pre>
2550     * Integer.signum(fast) == Integer.signum(slower)
2551     * </pre>
2552     * @param codePoint to test
2553     * @param s to test
2554     * @return equivalent of code point comparator comparing two strings.
2555     */
2556    public static int compareCodePoint(int codePoint, CharSequence s) {
2557        if (s == null) {
2558            return 1;
2559        }
2560        final int strLen = s.length();
2561        if (strLen == 0) {
2562            return 1;
2563        }
2564        int second = Character.codePointAt(s, 0);
2565        int diff = codePoint - second;
2566        if (diff != 0) {
2567            return diff;
2568        }
2569        return strLen == Character.charCount(codePoint) ? 0 : -1;
2570    }
2571
2572    // private data members -------------------------------------------------
2573
2574    /**
2575     * Shift value for lead surrogate to form a supplementary character.
2576     */
2577    private static final int LEAD_SURROGATE_SHIFT_ = 10;
2578
2579    /**
2580     * Mask to retrieve the significant value from a trail surrogate.
2581     */
2582    private static final int TRAIL_SURROGATE_MASK_ = 0x3FF;
2583
2584    /**
2585     * Value that all lead surrogate starts with
2586     */
2587    private static final int LEAD_SURROGATE_OFFSET_ = LEAD_SURROGATE_MIN_VALUE
2588            - (SUPPLEMENTARY_MIN_VALUE >> LEAD_SURROGATE_SHIFT_);
2589
2590    // private methods ------------------------------------------------------
2591
2592    /**
2593     * <p>
2594     * Converts argument code point and returns a String object representing the code point's value
2595     * in UTF16 format.
2596     * </p>
2597     * <p>
2598     * This method does not check for the validity of the codepoint, the results are not guaranteed
2599     * if a invalid codepoint is passed as argument.
2600     * </p>
2601     * <p>
2602     * The result is a string whose length is 1 for non-supplementary code points, 2 otherwise.
2603     * </p>
2604     *
2605     * @param ch
2606     *            code point
2607     * @return string representation of the code point
2608     */
2609    private static String toString(int ch) {
2610        if (ch < SUPPLEMENTARY_MIN_VALUE) {
2611            return String.valueOf((char) ch);
2612        }
2613
2614        StringBuilder result = new StringBuilder();
2615        result.append(getLeadSurrogate(ch));
2616        result.append(getTrailSurrogate(ch));
2617        return result.toString();
2618    }
2619}
2620// eof
2621