1/* GENERATED SOURCE. DO NOT MODIFY. */
2/*
3 *******************************************************************************
4 * Copyright (C) 1996-2015, International Business Machines Corporation and    *
5 * others. All Rights Reserved.                                                *
6 *******************************************************************************
7 */
8package android.icu.impl;
9
10import java.io.IOException;
11import java.util.ArrayList;
12import java.util.Locale;
13import java.util.regex.Pattern;
14
15import android.icu.lang.UCharacter;
16import android.icu.text.Replaceable;
17import android.icu.text.UTF16;
18import android.icu.text.UnicodeMatcher;
19
20/**
21 * @hide Only a subset of ICU is exposed in Android
22 */
23public final class Utility {
24
25    private static final char APOSTROPHE = '\'';
26    private static final char BACKSLASH  = '\\';
27    private static final int MAGIC_UNSIGNED = 0x80000000;
28
29    /**
30     * Convenience utility to compare two Object[]s.
31     * Ought to be in System
32     */
33    public final static boolean arrayEquals(Object[] source, Object target) {
34        if (source == null) return (target == null);
35        if (!(target instanceof Object[])) return false;
36        Object[] targ = (Object[]) target;
37        return (source.length == targ.length
38                && arrayRegionMatches(source, 0, targ, 0, source.length));
39    }
40
41    /**
42     * Convenience utility to compare two int[]s
43     * Ought to be in System
44     */
45    public final static boolean arrayEquals(int[] source, Object target) {
46        if (source == null) return (target == null);
47        if (!(target instanceof int[])) return false;
48        int[] targ = (int[]) target;
49        return (source.length == targ.length
50                && arrayRegionMatches(source, 0, targ, 0, source.length));
51    }
52
53    /**
54     * Convenience utility to compare two double[]s
55     * Ought to be in System
56     */
57    public final static boolean arrayEquals(double[] source, Object target) {
58        if (source == null) return (target == null);
59        if (!(target instanceof double[])) return false;
60        double[] targ = (double[]) target;
61        return (source.length == targ.length
62                && arrayRegionMatches(source, 0, targ, 0, source.length));
63    }
64    public final static boolean arrayEquals(byte[] source, Object target) {
65        if (source == null) return (target == null);
66        if (!(target instanceof byte[])) return false;
67        byte[] targ = (byte[]) target;
68        return (source.length == targ.length
69                && arrayRegionMatches(source, 0, targ, 0, source.length));
70    }
71
72    /**
73     * Convenience utility to compare two Object[]s
74     * Ought to be in System
75     */
76    public final static boolean arrayEquals(Object source, Object target) {
77        if (source == null) return (target == null);
78        // for some reason, the correct arrayEquals is not being called
79        // so do it by hand for now.
80        if (source instanceof Object[])
81            return(arrayEquals((Object[]) source,target));
82        if (source instanceof int[])
83            return(arrayEquals((int[]) source,target));
84        if (source instanceof double[])
85            return(arrayEquals((double[]) source, target));
86        if (source instanceof byte[])
87            return(arrayEquals((byte[]) source,target));
88        return source.equals(target);
89    }
90
91    /**
92     * Convenience utility to compare two Object[]s
93     * Ought to be in System.
94     * @param len the length to compare.
95     * The start indices and start+len must be valid.
96     */
97    public final static boolean arrayRegionMatches(Object[] source, int sourceStart,
98            Object[] target, int targetStart,
99            int len)
100    {
101        int sourceEnd = sourceStart + len;
102        int delta = targetStart - sourceStart;
103        for (int i = sourceStart; i < sourceEnd; i++) {
104            if (!arrayEquals(source[i],target[i + delta]))
105                return false;
106        }
107        return true;
108    }
109
110    /**
111     * Convenience utility to compare two Object[]s
112     * Ought to be in System.
113     * @param len the length to compare.
114     * The start indices and start+len must be valid.
115     */
116    public final static boolean arrayRegionMatches(char[] source, int sourceStart,
117            char[] target, int targetStart,
118            int len)
119    {
120        int sourceEnd = sourceStart + len;
121        int delta = targetStart - sourceStart;
122        for (int i = sourceStart; i < sourceEnd; i++) {
123            if (source[i]!=target[i + delta])
124                return false;
125        }
126        return true;
127    }
128
129    /**
130     * Convenience utility to compare two int[]s.
131     * @param len the length to compare.
132     * The start indices and start+len must be valid.
133     * Ought to be in System
134     */
135    public final static boolean arrayRegionMatches(int[] source, int sourceStart,
136            int[] target, int targetStart,
137            int len)
138    {
139        int sourceEnd = sourceStart + len;
140        int delta = targetStart - sourceStart;
141        for (int i = sourceStart; i < sourceEnd; i++) {
142            if (source[i] != target[i + delta])
143                return false;
144        }
145        return true;
146    }
147
148    /**
149     * Convenience utility to compare two arrays of doubles.
150     * @param len the length to compare.
151     * The start indices and start+len must be valid.
152     * Ought to be in System
153     */
154    public final static boolean arrayRegionMatches(double[] source, int sourceStart,
155            double[] target, int targetStart,
156            int len)
157    {
158        int sourceEnd = sourceStart + len;
159        int delta = targetStart - sourceStart;
160        for (int i = sourceStart; i < sourceEnd; i++) {
161            if (source[i] != target[i + delta])
162                return false;
163        }
164        return true;
165    }
166    public final static boolean arrayRegionMatches(byte[] source, int sourceStart,
167            byte[] target, int targetStart, int len){
168        int sourceEnd = sourceStart + len;
169        int delta = targetStart - sourceStart;
170        for (int i = sourceStart; i < sourceEnd; i++) {
171            if (source[i] != target[i + delta])
172                return false;
173        }
174        return true;
175    }
176
177    /**
178     * Convenience utility. Does null checks on objects, then calls equals.
179     */
180    public final static boolean objectEquals(Object a, Object b) {
181        return a == null ?
182                b == null ? true : false :
183                    b == null ? false : a.equals(b);
184    }
185
186    /**
187     * Convenience utility. Does null checks on objects, then calls compare.
188     */
189    public static <T extends Comparable<T>> int checkCompare(T a, T b) {
190        return a == null ?
191                b == null ? 0 : -1 :
192                    b == null ? 1 : a.compareTo(b);
193      }
194
195    /**
196     * Convenience utility. Does null checks on object, then calls hashCode.
197     */
198    public static int checkHash(Object a) {
199        return a == null ? 0 : a.hashCode();
200      }
201
202    /**
203     * The ESCAPE character is used during run-length encoding.  It signals
204     * a run of identical chars.
205     */
206    private static final char ESCAPE = '\uA5A5';
207
208    /**
209     * The ESCAPE_BYTE character is used during run-length encoding.  It signals
210     * a run of identical bytes.
211     */
212    static final byte ESCAPE_BYTE = (byte)0xA5;
213
214    /**
215     * Construct a string representing an int array.  Use run-length encoding.
216     * A character represents itself, unless it is the ESCAPE character.  Then
217     * the following notations are possible:
218     *   ESCAPE ESCAPE   ESCAPE literal
219     *   ESCAPE n c      n instances of character c
220     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
221     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
222     * If we encounter a run where n == ESCAPE, we represent this as:
223     *   c ESCAPE n-1 c
224     * The ESCAPE value is chosen so as not to collide with commonly
225     * seen values.
226     */
227    static public final String arrayToRLEString(int[] a) {
228        StringBuilder buffer = new StringBuilder();
229
230        appendInt(buffer, a.length);
231        int runValue = a[0];
232        int runLength = 1;
233        for (int i=1; i<a.length; ++i) {
234            int s = a[i];
235            if (s == runValue && runLength < 0xFFFF) {
236                ++runLength;
237            } else {
238                encodeRun(buffer, runValue, runLength);
239                runValue = s;
240                runLength = 1;
241            }
242        }
243        encodeRun(buffer, runValue, runLength);
244        return buffer.toString();
245    }
246
247    /**
248     * Construct a string representing a short array.  Use run-length encoding.
249     * A character represents itself, unless it is the ESCAPE character.  Then
250     * the following notations are possible:
251     *   ESCAPE ESCAPE   ESCAPE literal
252     *   ESCAPE n c      n instances of character c
253     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
254     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
255     * If we encounter a run where n == ESCAPE, we represent this as:
256     *   c ESCAPE n-1 c
257     * The ESCAPE value is chosen so as not to collide with commonly
258     * seen values.
259     */
260    static public final String arrayToRLEString(short[] a) {
261        StringBuilder buffer = new StringBuilder();
262        // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
263        buffer.append((char) (a.length >> 16));
264        buffer.append((char) a.length);
265        short runValue = a[0];
266        int runLength = 1;
267        for (int i=1; i<a.length; ++i) {
268            short s = a[i];
269            if (s == runValue && runLength < 0xFFFF) ++runLength;
270            else {
271                encodeRun(buffer, runValue, runLength);
272                runValue = s;
273                runLength = 1;
274            }
275        }
276        encodeRun(buffer, runValue, runLength);
277        return buffer.toString();
278    }
279
280    /**
281     * Construct a string representing a char array.  Use run-length encoding.
282     * A character represents itself, unless it is the ESCAPE character.  Then
283     * the following notations are possible:
284     *   ESCAPE ESCAPE   ESCAPE literal
285     *   ESCAPE n c      n instances of character c
286     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
287     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
288     * If we encounter a run where n == ESCAPE, we represent this as:
289     *   c ESCAPE n-1 c
290     * The ESCAPE value is chosen so as not to collide with commonly
291     * seen values.
292     */
293    static public final String arrayToRLEString(char[] a) {
294        StringBuilder buffer = new StringBuilder();
295        buffer.append((char) (a.length >> 16));
296        buffer.append((char) a.length);
297        char runValue = a[0];
298        int runLength = 1;
299        for (int i=1; i<a.length; ++i) {
300            char s = a[i];
301            if (s == runValue && runLength < 0xFFFF) ++runLength;
302            else {
303                encodeRun(buffer, (short)runValue, runLength);
304                runValue = s;
305                runLength = 1;
306            }
307        }
308        encodeRun(buffer, (short)runValue, runLength);
309        return buffer.toString();
310    }
311
312    /**
313     * Construct a string representing a byte array.  Use run-length encoding.
314     * Two bytes are packed into a single char, with a single extra zero byte at
315     * the end if needed.  A byte represents itself, unless it is the
316     * ESCAPE_BYTE.  Then the following notations are possible:
317     *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
318     *   ESCAPE_BYTE n b           n instances of byte b
319     * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
320     * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
321     * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
322     *   b ESCAPE_BYTE n-1 b
323     * The ESCAPE_BYTE value is chosen so as not to collide with commonly
324     * seen values.
325     */
326    static public final String arrayToRLEString(byte[] a) {
327        StringBuilder buffer = new StringBuilder();
328        buffer.append((char) (a.length >> 16));
329        buffer.append((char) a.length);
330        byte runValue = a[0];
331        int runLength = 1;
332        byte[] state = new byte[2];
333        for (int i=1; i<a.length; ++i) {
334            byte b = a[i];
335            if (b == runValue && runLength < 0xFF) ++runLength;
336            else {
337                encodeRun(buffer, runValue, runLength, state);
338                runValue = b;
339                runLength = 1;
340            }
341        }
342        encodeRun(buffer, runValue, runLength, state);
343
344        // We must save the final byte, if there is one, by padding
345        // an extra zero.
346        if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);
347
348        return buffer.toString();
349    }
350
351    /**
352     * Encode a run, possibly a degenerate run (of < 4 values).
353     * @param length The length of the run; must be > 0 && <= 0xFFFF.
354     */
355    private static final <T extends Appendable> void encodeRun(T buffer, int value, int length) {
356        if (length < 4) {
357            for (int j=0; j<length; ++j) {
358                if (value == ESCAPE) {
359                    appendInt(buffer, value);
360                }
361                appendInt(buffer, value);
362            }
363        }
364        else {
365            if (length == (int) ESCAPE) {
366                if (value == (int) ESCAPE) {
367                    appendInt(buffer, ESCAPE);
368                }
369                appendInt(buffer, value);
370                --length;
371            }
372            appendInt(buffer, ESCAPE);
373            appendInt(buffer, length);
374            appendInt(buffer, value); // Don't need to escape this value
375        }
376    }
377
378    private static final <T extends Appendable> void appendInt(T buffer, int value) {
379        try {
380            buffer.append((char)(value >>> 16));
381            buffer.append((char)(value & 0xFFFF));
382        } catch (IOException e) {
383            throw new IllegalIcuArgumentException(e);
384        }
385    }
386
387    /**
388     * Encode a run, possibly a degenerate run (of < 4 values).
389     * @param length The length of the run; must be > 0 && <= 0xFFFF.
390     */
391    private static final <T extends Appendable> void encodeRun(T buffer, short value, int length) {
392        try {
393            if (length < 4) {
394                for (int j=0; j<length; ++j) {
395                    if (value == (int) ESCAPE)
396                        buffer.append(ESCAPE);
397                    buffer.append((char) value);
398                }
399            }
400            else {
401                if (length == (int) ESCAPE) {
402                    if (value == (int) ESCAPE) buffer.append(ESCAPE);
403                    buffer.append((char) value);
404                    --length;
405                }
406                buffer.append(ESCAPE);
407                buffer.append((char) length);
408                buffer.append((char) value); // Don't need to escape this value
409            }
410        } catch (IOException e) {
411            throw new IllegalIcuArgumentException(e);
412        }
413    }
414
415    /**
416     * Encode a run, possibly a degenerate run (of < 4 values).
417     * @param length The length of the run; must be > 0 && <= 0xFF.
418     */
419    private static final <T extends Appendable> void encodeRun(T buffer, byte value, int length,
420            byte[] state) {
421        if (length < 4) {
422            for (int j=0; j<length; ++j) {
423                if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
424                appendEncodedByte(buffer, value, state);
425            }
426        }
427        else {
428            if (length == ESCAPE_BYTE) {
429                if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
430                appendEncodedByte(buffer, value, state);
431                --length;
432            }
433            appendEncodedByte(buffer, ESCAPE_BYTE, state);
434            appendEncodedByte(buffer, (byte)length, state);
435            appendEncodedByte(buffer, value, state); // Don't need to escape this value
436        }
437    }
438
439    /**
440     * Append a byte to the given Appendable, packing two bytes into each
441     * character.  The state parameter maintains intermediary data between
442     * calls.
443     * @param state A two-element array, with state[0] == 0 if this is the
444     * first byte of a pair, or state[0] != 0 if this is the second byte
445     * of a pair, in which case state[1] is the first byte.
446     */
447    private static final <T extends Appendable> void appendEncodedByte(T buffer, byte value,
448            byte[] state) {
449        try {
450            if (state[0] != 0) {
451                char c = (char) ((state[1] << 8) | (((int) value) & 0xFF));
452                buffer.append(c);
453                state[0] = 0;
454            }
455            else {
456                state[0] = 1;
457                state[1] = value;
458            }
459        } catch (IOException e) {
460            throw new IllegalIcuArgumentException(e);
461        }
462    }
463
464    /**
465     * Construct an array of ints from a run-length encoded string.
466     */
467    static public final int[] RLEStringToIntArray(String s) {
468        int length = getInt(s, 0);
469        int[] array = new int[length];
470        int ai = 0, i = 1;
471
472        int maxI = s.length() / 2;
473        while (ai < length && i < maxI) {
474            int c = getInt(s, i++);
475
476            if (c == ESCAPE) {
477                c = getInt(s, i++);
478                if (c == ESCAPE) {
479                    array[ai++] = c;
480                } else {
481                    int runLength = c;
482                    int runValue = getInt(s, i++);
483                    for (int j=0; j<runLength; ++j) {
484                        array[ai++] = runValue;
485                    }
486                }
487            }
488            else {
489                array[ai++] = c;
490            }
491        }
492
493        if (ai != length || i != maxI) {
494            throw new IllegalStateException("Bad run-length encoded int array");
495        }
496
497        return array;
498    }
499    static final int getInt(String s, int i) {
500        return (((int) s.charAt(2*i)) << 16) | (int) s.charAt(2*i+1);
501    }
502
503    /**
504     * Construct an array of shorts from a run-length encoded string.
505     */
506    static public final short[] RLEStringToShortArray(String s) {
507        int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
508        short[] array = new short[length];
509        int ai = 0;
510        for (int i=2; i<s.length(); ++i) {
511            char c = s.charAt(i);
512            if (c == ESCAPE) {
513                c = s.charAt(++i);
514                if (c == ESCAPE) {
515                    array[ai++] = (short) c;
516                } else {
517                    int runLength = (int) c;
518                    short runValue = (short) s.charAt(++i);
519                    for (int j=0; j<runLength; ++j) array[ai++] = runValue;
520                }
521            }
522            else {
523                array[ai++] = (short) c;
524            }
525        }
526
527        if (ai != length)
528            throw new IllegalStateException("Bad run-length encoded short array");
529
530        return array;
531    }
532
533    /**
534     * Construct an array of shorts from a run-length encoded string.
535     */
536    static public final char[] RLEStringToCharArray(String s) {
537        int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
538        char[] array = new char[length];
539        int ai = 0;
540        for (int i=2; i<s.length(); ++i) {
541            char c = s.charAt(i);
542            if (c == ESCAPE) {
543                c = s.charAt(++i);
544                if (c == ESCAPE) {
545                    array[ai++] = c;
546                } else {
547                    int runLength = (int) c;
548                    char runValue = s.charAt(++i);
549                    for (int j=0; j<runLength; ++j) array[ai++] = runValue;
550                }
551            }
552            else {
553                array[ai++] = c;
554            }
555        }
556
557        if (ai != length)
558            throw new IllegalStateException("Bad run-length encoded short array");
559
560        return array;
561    }
562
563    /**
564     * Construct an array of bytes from a run-length encoded string.
565     */
566    static public final byte[] RLEStringToByteArray(String s) {
567        int length = (((int) s.charAt(0)) << 16) | ((int) s.charAt(1));
568        byte[] array = new byte[length];
569        boolean nextChar = true;
570        char c = 0;
571        int node = 0;
572        int runLength = 0;
573        int i = 2;
574        for (int ai=0; ai<length; ) {
575            // This part of the loop places the next byte into the local
576            // variable 'b' each time through the loop.  It keeps the
577            // current character in 'c' and uses the boolean 'nextChar'
578            // to see if we've taken both bytes out of 'c' yet.
579            byte b;
580            if (nextChar) {
581                c = s.charAt(i++);
582                b = (byte) (c >> 8);
583                nextChar = false;
584            }
585            else {
586                b = (byte) (c & 0xFF);
587                nextChar = true;
588            }
589
590            // This part of the loop is a tiny state machine which handles
591            // the parsing of the run-length encoding.  This would be simpler
592            // if we could look ahead, but we can't, so we use 'node' to
593            // move between three nodes in the state machine.
594            switch (node) {
595            case 0:
596                // Normal idle node
597                if (b == ESCAPE_BYTE) {
598                    node = 1;
599                }
600                else {
601                    array[ai++] = b;
602                }
603                break;
604            case 1:
605                // We have seen one ESCAPE_BYTE; we expect either a second
606                // one, or a run length and value.
607                if (b == ESCAPE_BYTE) {
608                    array[ai++] = ESCAPE_BYTE;
609                    node = 0;
610                }
611                else {
612                    runLength = b;
613                    // Interpret signed byte as unsigned
614                    if (runLength < 0) runLength += 0x100;
615                    node = 2;
616                }
617                break;
618            case 2:
619                // We have seen an ESCAPE_BYTE and length byte.  We interpret
620                // the next byte as the value to be repeated.
621                for (int j=0; j<runLength; ++j) array[ai++] = b;
622                node = 0;
623                break;
624            }
625        }
626
627        if (node != 0)
628            throw new IllegalStateException("Bad run-length encoded byte array");
629
630        if (i != s.length())
631            throw new IllegalStateException("Excess data in RLE byte array string");
632
633        return array;
634    }
635
636    static public String LINE_SEPARATOR = System.getProperty("line.separator");
637
638    /**
639     * Format a String for representation in a source file.  This includes
640     * breaking it into lines and escaping characters using octal notation
641     * when necessary (control characters and double quotes).
642     */
643    static public final String formatForSource(String s) {
644        StringBuilder buffer = new StringBuilder();
645        for (int i=0; i<s.length();) {
646            if (i > 0) buffer.append('+').append(LINE_SEPARATOR);
647            buffer.append("        \"");
648            int count = 11;
649            while (i<s.length() && count<80) {
650                char c = s.charAt(i++);
651                if (c < '\u0020' || c == '"' || c == '\\') {
652                    if (c == '\n') {
653                        buffer.append("\\n");
654                        count += 2;
655                    } else if (c == '\t') {
656                        buffer.append("\\t");
657                        count += 2;
658                    } else if (c == '\r') {
659                        buffer.append("\\r");
660                        count += 2;
661                    } else {
662                        // Represent control characters, backslash and double quote
663                        // using octal notation; otherwise the string we form
664                        // won't compile, since Unicode escape sequences are
665                        // processed before tokenization.
666                        buffer.append('\\');
667                        buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
668                        buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
669                        buffer.append(HEX_DIGIT[(c & 0007)]);
670                        count += 4;
671                    }
672                }
673                else if (c <= '\u007E') {
674                    buffer.append(c);
675                    count += 1;
676                }
677                else {
678                    buffer.append("\\u");
679                    buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
680                    buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
681                    buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
682                    buffer.append(HEX_DIGIT[(c & 0x000F)]);
683                    count += 6;
684                }
685            }
686            buffer.append('"');
687        }
688        return buffer.toString();
689    }
690
691    static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
692        '8','9','A','B','C','D','E','F'};
693
694    /**
695     * Format a String for representation in a source file.  Like
696     * formatForSource but does not do line breaking.
697     */
698    static public final String format1ForSource(String s) {
699        StringBuilder buffer = new StringBuilder();
700        buffer.append("\"");
701        for (int i=0; i<s.length();) {
702            char c = s.charAt(i++);
703            if (c < '\u0020' || c == '"' || c == '\\') {
704                if (c == '\n') {
705                    buffer.append("\\n");
706                } else if (c == '\t') {
707                    buffer.append("\\t");
708                } else if (c == '\r') {
709                    buffer.append("\\r");
710                } else {
711                    // Represent control characters, backslash and double quote
712                    // using octal notation; otherwise the string we form
713                    // won't compile, since Unicode escape sequences are
714                    // processed before tokenization.
715                    buffer.append('\\');
716                    buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
717                    buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
718                    buffer.append(HEX_DIGIT[(c & 0007)]);
719                }
720            }
721            else if (c <= '\u007E') {
722                buffer.append(c);
723            }
724            else {
725                buffer.append("\\u");
726                buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
727                buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
728                buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
729                buffer.append(HEX_DIGIT[(c & 0x000F)]);
730            }
731        }
732        buffer.append('"');
733        return buffer.toString();
734    }
735
736    /**
737     * Convert characters outside the range U+0020 to U+007F to
738     * Unicode escapes, and convert backslash to a double backslash.
739     */
740    public static final String escape(String s) {
741        StringBuilder buf = new StringBuilder();
742        for (int i=0; i<s.length(); ) {
743            int c = Character.codePointAt(s, i);
744            i += UTF16.getCharCount(c);
745            if (c >= ' ' && c <= 0x007F) {
746                if (c == '\\') {
747                    buf.append("\\\\"); // That is, "\\"
748                } else {
749                    buf.append((char)c);
750                }
751            } else {
752                boolean four = c <= 0xFFFF;
753                buf.append(four ? "\\u" : "\\U");
754                buf.append(hex(c, four ? 4 : 8));
755            }
756        }
757        return buf.toString();
758    }
759
760    /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
761    static private final char[] UNESCAPE_MAP = {
762        /*"   0x22, 0x22 */
763        /*'   0x27, 0x27 */
764        /*?   0x3F, 0x3F */
765        /*\   0x5C, 0x5C */
766        /*a*/ 0x61, 0x07,
767        /*b*/ 0x62, 0x08,
768        /*e*/ 0x65, 0x1b,
769        /*f*/ 0x66, 0x0c,
770        /*n*/ 0x6E, 0x0a,
771        /*r*/ 0x72, 0x0d,
772        /*t*/ 0x74, 0x09,
773        /*v*/ 0x76, 0x0b
774    };
775
776    /**
777     * Convert an escape to a 32-bit code point value.  We attempt
778     * to parallel the icu4c unescapeAt() function.
779     * @param offset16 an array containing offset to the character
780     * <em>after</em> the backslash.  Upon return offset16[0] will
781     * be updated to point after the escape sequence.
782     * @return character value from 0 to 10FFFF, or -1 on error.
783     */
784    public static int unescapeAt(String s, int[] offset16) {
785        int c;
786        int result = 0;
787        int n = 0;
788        int minDig = 0;
789        int maxDig = 0;
790        int bitsPerDigit = 4;
791        int dig;
792        int i;
793        boolean braces = false;
794
795        /* Check that offset is in range */
796        int offset = offset16[0];
797        int length = s.length();
798        if (offset < 0 || offset >= length) {
799            return -1;
800        }
801
802        /* Fetch first UChar after '\\' */
803        c = Character.codePointAt(s, offset);
804        offset += UTF16.getCharCount(c);
805
806        /* Convert hexadecimal and octal escapes */
807        switch (c) {
808        case 'u':
809            minDig = maxDig = 4;
810            break;
811        case 'U':
812            minDig = maxDig = 8;
813            break;
814        case 'x':
815            minDig = 1;
816            if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
817                ++offset;
818                braces = true;
819                maxDig = 8;
820            } else {
821                maxDig = 2;
822            }
823            break;
824        default:
825            dig = UCharacter.digit(c, 8);
826            if (dig >= 0) {
827                minDig = 1;
828                maxDig = 3;
829                n = 1; /* Already have first octal digit */
830                bitsPerDigit = 3;
831                result = dig;
832            }
833            break;
834        }
835        if (minDig != 0) {
836            while (offset < length && n < maxDig) {
837                c = UTF16.charAt(s, offset);
838                dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
839                if (dig < 0) {
840                    break;
841                }
842                result = (result << bitsPerDigit) | dig;
843                offset += UTF16.getCharCount(c);
844                ++n;
845            }
846            if (n < minDig) {
847                return -1;
848            }
849            if (braces) {
850                if (c != 0x7D /*}*/) {
851                    return -1;
852                }
853                ++offset;
854            }
855            if (result < 0 || result >= 0x110000) {
856                return -1;
857            }
858            // If an escape sequence specifies a lead surrogate, see
859            // if there is a trail surrogate after it, either as an
860            // escape or as a literal.  If so, join them up into a
861            // supplementary.
862            if (offset < length &&
863                    UTF16.isLeadSurrogate((char) result)) {
864                int ahead = offset+1;
865                c = s.charAt(offset); // [sic] get 16-bit code unit
866                if (c == '\\' && ahead < length) {
867                    int o[] = new int[] { ahead };
868                    c = unescapeAt(s, o);
869                    ahead = o[0];
870                }
871                if (UTF16.isTrailSurrogate((char) c)) {
872                    offset = ahead;
873                    result = Character.toCodePoint((char) result, (char) c);
874                }
875            }
876            offset16[0] = offset;
877            return result;
878        }
879
880        /* Convert C-style escapes in table */
881        for (i=0; i<UNESCAPE_MAP.length; i+=2) {
882            if (c == UNESCAPE_MAP[i]) {
883                offset16[0] = offset;
884                return UNESCAPE_MAP[i+1];
885            } else if (c < UNESCAPE_MAP[i]) {
886                break;
887            }
888        }
889
890        /* Map \cX to control-X: X & 0x1F */
891        if (c == 'c' && offset < length) {
892            c = UTF16.charAt(s, offset);
893            offset16[0] = offset + UTF16.getCharCount(c);
894            return 0x1F & c;
895        }
896
897        /* If no special forms are recognized, then consider
898         * the backslash to generically escape the next character. */
899        offset16[0] = offset;
900        return c;
901    }
902
903    /**
904     * Convert all escapes in a given string using unescapeAt().
905     * @exception IllegalArgumentException if an invalid escape is
906     * seen.
907     */
908    public static String unescape(String s) {
909        StringBuilder buf = new StringBuilder();
910        int[] pos = new int[1];
911        for (int i=0; i<s.length(); ) {
912            char c = s.charAt(i++);
913            if (c == '\\') {
914                pos[0] = i;
915                int e = unescapeAt(s, pos);
916                if (e < 0) {
917                    throw new IllegalArgumentException("Invalid escape sequence " +
918                            s.substring(i-1, Math.min(i+8, s.length())));
919                }
920                buf.appendCodePoint(e);
921                i = pos[0];
922            } else {
923                buf.append(c);
924            }
925        }
926        return buf.toString();
927    }
928
929    /**
930     * Convert all escapes in a given string using unescapeAt().
931     * Leave invalid escape sequences unchanged.
932     */
933    public static String unescapeLeniently(String s) {
934        StringBuilder buf = new StringBuilder();
935        int[] pos = new int[1];
936        for (int i=0; i<s.length(); ) {
937            char c = s.charAt(i++);
938            if (c == '\\') {
939                pos[0] = i;
940                int e = unescapeAt(s, pos);
941                if (e < 0) {
942                    buf.append(c);
943                } else {
944                    buf.appendCodePoint(e);
945                    i = pos[0];
946                }
947            } else {
948                buf.append(c);
949            }
950        }
951        return buf.toString();
952    }
953
954    /**
955     * Convert a char to 4 hex uppercase digits.  E.g., hex('a') =>
956     * "0041".
957     */
958    public static String hex(long ch) {
959        return hex(ch, 4);
960    }
961
962    /**
963     * Supplies a zero-padded hex representation of an integer (without 0x)
964     */
965    static public String hex(long i, int places) {
966        if (i == Long.MIN_VALUE) return "-8000000000000000";
967        boolean negative = i < 0;
968        if (negative) {
969            i = -i;
970        }
971        String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH);
972        if (result.length() < places) {
973            result = "0000000000000000".substring(result.length(),places) + result;
974        }
975        if (negative) {
976            return '-' + result;
977        }
978        return result;
979    }
980
981    /**
982     * Convert a string to comma-separated groups of 4 hex uppercase
983     * digits.  E.g., hex('ab') => "0041,0042".
984     */
985    public static String hex(CharSequence s) {
986        return hex(s, 4, ",", true, new StringBuilder()).toString();
987    }
988
989    /**
990     * Convert a string to separated groups of hex uppercase
991     * digits.  E.g., hex('ab'...) => "0041,0042".  Append the output
992     * to the given Appendable.
993     */
994    public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
995        try {
996            if (useCodePoints) {
997                int cp;
998                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
999                    cp = Character.codePointAt(s, i);
1000                    if (i != 0) {
1001                        result.append(separator);
1002                    }
1003                    result.append(hex(cp,width));
1004                }
1005            } else {
1006                for (int i = 0; i < s.length(); ++i) {
1007                    if (i != 0) {
1008                        result.append(separator);
1009                    }
1010                    result.append(hex(s.charAt(i),width));
1011                }
1012            }
1013            return result;
1014        } catch (IOException e) {
1015            throw new IllegalIcuArgumentException(e);
1016        }
1017    }
1018
1019    public static String hex(byte[] o, int start, int end, String separator) {
1020        StringBuilder result = new StringBuilder();
1021        //int ch;
1022        for (int i = start; i < end; ++i) {
1023          if (i != 0) result.append(separator);
1024          result.append(hex(o[i]));
1025        }
1026        return result.toString();
1027      }
1028
1029    /**
1030     * Convert a string to comma-separated groups of 4 hex uppercase
1031     * digits.  E.g., hex('ab') => "0041,0042".
1032     */
1033    public static <S extends CharSequence> String hex(S s, int width, S separator) {
1034        return hex(s, width, separator, true, new StringBuilder()).toString();
1035    }
1036
1037    /**
1038     * Split a string into pieces based on the given divider character
1039     * @param s the string to split
1040     * @param divider the character on which to split.  Occurrences of
1041     * this character are not included in the output
1042     * @param output an array to receive the substrings between
1043     * instances of divider.  It must be large enough on entry to
1044     * accomodate all output.  Adjacent instances of the divider
1045     * character will place empty strings into output.  Before
1046     * returning, output is padded out with empty strings.
1047     */
1048    public static void split(String s, char divider, String[] output) {
1049        int last = 0;
1050        int current = 0;
1051        int i;
1052        for (i = 0; i < s.length(); ++i) {
1053            if (s.charAt(i) == divider) {
1054                output[current++] = s.substring(last,i);
1055                last = i+1;
1056            }
1057        }
1058        output[current++] = s.substring(last,i);
1059        while (current < output.length) {
1060            output[current++] = "";
1061        }
1062    }
1063
1064    /**
1065     * Split a string into pieces based on the given divider character
1066     * @param s the string to split
1067     * @param divider the character on which to split.  Occurrences of
1068     * this character are not included in the output
1069     * @return output an array to receive the substrings between
1070     * instances of divider. Adjacent instances of the divider
1071     * character will place empty strings into output.
1072     */
1073    public static String[] split(String s, char divider) {
1074        int last = 0;
1075        int i;
1076        ArrayList<String> output = new ArrayList<String>();
1077        for (i = 0; i < s.length(); ++i) {
1078            if (s.charAt(i) == divider) {
1079                output.add(s.substring(last,i));
1080                last = i+1;
1081            }
1082        }
1083        output.add( s.substring(last,i));
1084        return output.toArray(new String[output.size()]);
1085    }
1086
1087    /**
1088     * Look up a given string in a string array.  Returns the index at
1089     * which the first occurrence of the string was found in the
1090     * array, or -1 if it was not found.
1091     * @param source the string to search for
1092     * @param target the array of zero or more strings in which to
1093     * look for source
1094     * @return the index of target at which source first occurs, or -1
1095     * if not found
1096     */
1097    public static int lookup(String source, String[] target) {
1098        for (int i = 0; i < target.length; ++i) {
1099            if (source.equals(target[i])) return i;
1100        }
1101        return -1;
1102    }
1103
1104    /**
1105     * Parse a single non-whitespace character 'ch', optionally
1106     * preceded by whitespace.
1107     * @param id the string to be parsed
1108     * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
1109     * offset of the first character to be parsed.  On output, pos[0]
1110     * is the index after the last parsed character.  If the parse
1111     * fails, pos[0] will be unchanged.
1112     * @param ch the non-whitespace character to be parsed.
1113     * @return true if 'ch' is seen preceded by zero or more
1114     * whitespace characters.
1115     */
1116    public static boolean parseChar(String id, int[] pos, char ch) {
1117        int start = pos[0];
1118        pos[0] = PatternProps.skipWhiteSpace(id, pos[0]);
1119        if (pos[0] == id.length() ||
1120                id.charAt(pos[0]) != ch) {
1121            pos[0] = start;
1122            return false;
1123        }
1124        ++pos[0];
1125        return true;
1126    }
1127
1128    /**
1129     * Parse a pattern string starting at offset pos.  Keywords are
1130     * matched case-insensitively.  Spaces may be skipped and may be
1131     * optional or required.  Integer values may be parsed, and if
1132     * they are, they will be returned in the given array.  If
1133     * successful, the offset of the next non-space character is
1134     * returned.  On failure, -1 is returned.
1135     * @param pattern must only contain lowercase characters, which
1136     * will match their uppercase equivalents as well.  A space
1137     * character matches one or more required spaces.  A '~' character
1138     * matches zero or more optional spaces.  A '#' character matches
1139     * an integer and stores it in parsedInts, which the caller must
1140     * ensure has enough capacity.
1141     * @param parsedInts array to receive parsed integers.  Caller
1142     * must ensure that parsedInts.length is >= the number of '#'
1143     * signs in 'pattern'.
1144     * @return the position after the last character parsed, or -1 if
1145     * the parse failed
1146     */
1147    @SuppressWarnings("fallthrough")
1148    public static int parsePattern(String rule, int pos, int limit,
1149            String pattern, int[] parsedInts) {
1150        // TODO Update this to handle surrogates
1151        int[] p = new int[1];
1152        int intCount = 0; // number of integers parsed
1153        for (int i=0; i<pattern.length(); ++i) {
1154            char cpat = pattern.charAt(i);
1155            char c;
1156            switch (cpat) {
1157            case ' ':
1158                if (pos >= limit) {
1159                    return -1;
1160                }
1161                c = rule.charAt(pos++);
1162                if (!PatternProps.isWhiteSpace(c)) {
1163                    return -1;
1164                }
1165                // FALL THROUGH to skipWhitespace
1166            case '~':
1167                pos = PatternProps.skipWhiteSpace(rule, pos);
1168                break;
1169            case '#':
1170                p[0] = pos;
1171                parsedInts[intCount++] = parseInteger(rule, p, limit);
1172                if (p[0] == pos) {
1173                    // Syntax error; failed to parse integer
1174                    return -1;
1175                }
1176                pos = p[0];
1177                break;
1178            default:
1179                if (pos >= limit) {
1180                    return -1;
1181                }
1182                c = (char) UCharacter.toLowerCase(rule.charAt(pos++));
1183                if (c != cpat) {
1184                    return -1;
1185                }
1186                break;
1187            }
1188        }
1189        return pos;
1190    }
1191
1192    /**
1193     * Parse a pattern string within the given Replaceable and a parsing
1194     * pattern.  Characters are matched literally and case-sensitively
1195     * except for the following special characters:
1196     *
1197     * ~  zero or more Pattern_White_Space chars
1198     *
1199     * If end of pattern is reached with all matches along the way,
1200     * pos is advanced to the first unparsed index and returned.
1201     * Otherwise -1 is returned.
1202     * @param pat pattern that controls parsing
1203     * @param text text to be parsed, starting at index
1204     * @param index offset to first character to parse
1205     * @param limit offset after last character to parse
1206     * @return index after last parsed character, or -1 on parse failure.
1207     */
1208    public static int parsePattern(String pat,
1209            Replaceable text,
1210            int index,
1211            int limit) {
1212        int ipat = 0;
1213
1214        // empty pattern matches immediately
1215        if (ipat == pat.length()) {
1216            return index;
1217        }
1218
1219        int cpat = Character.codePointAt(pat, ipat);
1220
1221        while (index < limit) {
1222            int c = text.char32At(index);
1223
1224            // parse \s*
1225            if (cpat == '~') {
1226                if (PatternProps.isWhiteSpace(c)) {
1227                    index += UTF16.getCharCount(c);
1228                    continue;
1229                } else {
1230                    if (++ipat == pat.length()) {
1231                        return index; // success; c unparsed
1232                    }
1233                    // fall thru; process c again with next cpat
1234                }
1235            }
1236
1237            // parse literal
1238            else if (c == cpat) {
1239                int n = UTF16.getCharCount(c);
1240                index += n;
1241                ipat += n;
1242                if (ipat == pat.length()) {
1243                    return index; // success; c parsed
1244                }
1245                // fall thru; get next cpat
1246            }
1247
1248            // match failure of literal
1249            else {
1250                return -1;
1251            }
1252
1253            cpat = UTF16.charAt(pat, ipat);
1254        }
1255
1256        return -1; // text ended before end of pat
1257    }
1258
1259    /**
1260     * Parse an integer at pos, either of the form \d+ or of the form
1261     * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
1262     * or octal format.
1263     * @param pos INPUT-OUTPUT parameter.  On input, the first
1264     * character to parse.  On output, the character after the last
1265     * parsed character.
1266     */
1267    public static int parseInteger(String rule, int[] pos, int limit) {
1268        int count = 0;
1269        int value = 0;
1270        int p = pos[0];
1271        int radix = 10;
1272
1273        if (rule.regionMatches(true, p, "0x", 0, 2)) {
1274            p += 2;
1275            radix = 16;
1276        } else if (p < limit && rule.charAt(p) == '0') {
1277            p++;
1278            count = 1;
1279            radix = 8;
1280        }
1281
1282        while (p < limit) {
1283            int d = UCharacter.digit(rule.charAt(p++), radix);
1284            if (d < 0) {
1285                --p;
1286                break;
1287            }
1288            ++count;
1289            int v = (value * radix) + d;
1290            if (v <= value) {
1291                // If there are too many input digits, at some point
1292                // the value will go negative, e.g., if we have seen
1293                // "0x8000000" already and there is another '0', when
1294                // we parse the next 0 the value will go negative.
1295                return 0;
1296            }
1297            value = v;
1298        }
1299        if (count > 0) {
1300            pos[0] = p;
1301        }
1302        return value;
1303    }
1304
1305    /**
1306     * Parse a Unicode identifier from the given string at the given
1307     * position.  Return the identifier, or null if there is no
1308     * identifier.
1309     * @param str the string to parse
1310     * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
1311     * first character to examine.  It must be less than str.length(),
1312     * and it must not point to a whitespace character.  That is, must
1313     * have pos[0] < str.length().  On
1314     * OUTPUT, the position after the last parsed character.
1315     * @return the Unicode identifier, or null if there is no valid
1316     * identifier at pos[0].
1317     */
1318    public static String parseUnicodeIdentifier(String str, int[] pos) {
1319        // assert(pos[0] < str.length());
1320        StringBuilder buf = new StringBuilder();
1321        int p = pos[0];
1322        while (p < str.length()) {
1323            int ch = Character.codePointAt(str, p);
1324            if (buf.length() == 0) {
1325                if (UCharacter.isUnicodeIdentifierStart(ch)) {
1326                    buf.appendCodePoint(ch);
1327                } else {
1328                    return null;
1329                }
1330            } else {
1331                if (UCharacter.isUnicodeIdentifierPart(ch)) {
1332                    buf.appendCodePoint(ch);
1333                } else {
1334                    break;
1335                }
1336            }
1337            p += UTF16.getCharCount(ch);
1338        }
1339        pos[0] = p;
1340        return buf.toString();
1341    }
1342
1343    static final char DIGITS[] = {
1344        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
1345        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
1346        'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
1347        'U', 'V', 'W', 'X', 'Y', 'Z'
1348    };
1349
1350    /**
1351     * Append the digits of a positive integer to the given
1352     * <code>Appendable</code> in the given radix. This is
1353     * done recursively since it is easiest to generate the low-
1354     * order digit first, but it must be appended last.
1355     *
1356     * @param result is the <code>Appendable</code> to append to
1357     * @param n is the positive integer
1358     * @param radix is the radix, from 2 to 36 inclusive
1359     * @param minDigits is the minimum number of digits to append.
1360     */
1361    private static <T extends Appendable> void recursiveAppendNumber(T result, int n,
1362            int radix, int minDigits)
1363    {
1364        try {
1365            int digit = n % radix;
1366
1367            if (n >= radix || minDigits > 1) {
1368                recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
1369            }
1370            result.append(DIGITS[digit]);
1371        } catch (IOException e) {
1372            throw new IllegalIcuArgumentException(e);
1373        }
1374    }
1375
1376    /**
1377     * Append a number to the given Appendable in the given radix.
1378     * Standard digits '0'-'9' are used and letters 'A'-'Z' for
1379     * radices 11 through 36.
1380     * @param result the digits of the number are appended here
1381     * @param n the number to be converted to digits; may be negative.
1382     * If negative, a '-' is prepended to the digits.
1383     * @param radix a radix from 2 to 36 inclusive.
1384     * @param minDigits the minimum number of digits, not including
1385     * any '-', to produce.  Values less than 2 have no effect.  One
1386     * digit is always emitted regardless of this parameter.
1387     * @return a reference to result
1388     */
1389    public static <T extends Appendable> T appendNumber(T result, int n,
1390            int radix, int minDigits)
1391    {
1392        try {
1393            if (radix < 2 || radix > 36) {
1394                throw new IllegalArgumentException("Illegal radix " + radix);
1395            }
1396
1397
1398            int abs = n;
1399
1400            if (n < 0) {
1401                abs = -n;
1402                result.append("-");
1403            }
1404
1405            recursiveAppendNumber(result, abs, radix, minDigits);
1406
1407            return result;
1408        } catch (IOException e) {
1409            throw new IllegalIcuArgumentException(e);
1410        }
1411
1412    }
1413
1414    /**
1415     * Parse an unsigned 31-bit integer at the given offset.  Use
1416     * UCharacter.digit() to parse individual characters into digits.
1417     * @param text the text to be parsed
1418     * @param pos INPUT-OUTPUT parameter.  On entry, pos[0] is the
1419     * offset within text at which to start parsing; it should point
1420     * to a valid digit.  On exit, pos[0] is the offset after the last
1421     * parsed character.  If the parse failed, it will be unchanged on
1422     * exit.  Must be >= 0 on entry.
1423     * @param radix the radix in which to parse; must be >= 2 and <=
1424     * 36.
1425     * @return a non-negative parsed number, or -1 upon parse failure.
1426     * Parse fails if there are no digits, that is, if pos[0] does not
1427     * point to a valid digit on entry, or if the number to be parsed
1428     * does not fit into a 31-bit unsigned integer.
1429     */
1430    public static int parseNumber(String text, int[] pos, int radix) {
1431        // assert(pos[0] >= 0);
1432        // assert(radix >= 2);
1433        // assert(radix <= 36);
1434        int n = 0;
1435        int p = pos[0];
1436        while (p < text.length()) {
1437            int ch = Character.codePointAt(text, p);
1438            int d = UCharacter.digit(ch, radix);
1439            if (d < 0) {
1440                break;
1441            }
1442            n = radix*n + d;
1443            // ASSUME that when a 32-bit integer overflows it becomes
1444            // negative.  E.g., 214748364 * 10 + 8 => negative value.
1445            if (n < 0) {
1446                return -1;
1447            }
1448            ++p;
1449        }
1450        if (p == pos[0]) {
1451            return -1;
1452        }
1453        pos[0] = p;
1454        return n;
1455    }
1456
1457    /**
1458     * Return true if the character is NOT printable ASCII.  The tab,
1459     * newline and linefeed characters are considered unprintable.
1460     */
1461    public static boolean isUnprintable(int c) {
1462        //0x20 = 32 and 0x7E = 126
1463        return !(c >= 0x20 && c <= 0x7E);
1464    }
1465
1466    /**
1467     * Escape unprintable characters using <backslash>uxxxx notation
1468     * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
1469     * above.  If the character is printable ASCII, then do nothing
1470     * and return FALSE.  Otherwise, append the escaped notation and
1471     * return TRUE.
1472     */
1473    public static <T extends Appendable> boolean escapeUnprintable(T result, int c) {
1474        try {
1475            if (isUnprintable(c)) {
1476                result.append('\\');
1477                if ((c & ~0xFFFF) != 0) {
1478                    result.append('U');
1479                    result.append(DIGITS[0xF&(c>>28)]);
1480                    result.append(DIGITS[0xF&(c>>24)]);
1481                    result.append(DIGITS[0xF&(c>>20)]);
1482                    result.append(DIGITS[0xF&(c>>16)]);
1483                } else {
1484                    result.append('u');
1485                }
1486                result.append(DIGITS[0xF&(c>>12)]);
1487                result.append(DIGITS[0xF&(c>>8)]);
1488                result.append(DIGITS[0xF&(c>>4)]);
1489                result.append(DIGITS[0xF&c]);
1490                return true;
1491            }
1492            return false;
1493        } catch (IOException e) {
1494            throw new IllegalIcuArgumentException(e);
1495        }
1496    }
1497
1498    /**
1499     * Returns the index of the first character in a set, ignoring quoted text.
1500     * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
1501     * found by a search for "h".  Unlike String.indexOf(), this method searches
1502     * not for a single character, but for any character of the string
1503     * <code>setOfChars</code>.
1504     * @param text text to be searched
1505     * @param start the beginning index, inclusive; <code>0 <= start
1506     * <= limit</code>.
1507     * @param limit the ending index, exclusive; <code>start <= limit
1508     * <= text.length()</code>.
1509     * @param setOfChars string with one or more distinct characters
1510     * @return Offset of the first character in <code>setOfChars</code>
1511     * found, or -1 if not found.
1512     * @see String#indexOf
1513     */
1514    public static int quotedIndexOf(String text, int start, int limit,
1515            String setOfChars) {
1516        for (int i=start; i<limit; ++i) {
1517            char c = text.charAt(i);
1518            if (c == BACKSLASH) {
1519                ++i;
1520            } else if (c == APOSTROPHE) {
1521                while (++i < limit
1522                        && text.charAt(i) != APOSTROPHE) {}
1523            } else if (setOfChars.indexOf(c) >= 0) {
1524                return i;
1525            }
1526        }
1527        return -1;
1528    }
1529
1530    /**
1531     * Append a character to a rule that is being built up.  To flush
1532     * the quoteBuf to rule, make one final call with isLiteral == true.
1533     * If there is no final character, pass in (int)-1 as c.
1534     * @param rule the string to append the character to
1535     * @param c the character to append, or (int)-1 if none.
1536     * @param isLiteral if true, then the given character should not be
1537     * quoted or escaped.  Usually this means it is a syntactic element
1538     * such as > or $
1539     * @param escapeUnprintable if true, then unprintable characters
1540     * should be escaped using escapeUnprintable().  These escapes will
1541     * appear outside of quotes.
1542     * @param quoteBuf a buffer which is used to build up quoted
1543     * substrings.  The caller should initially supply an empty buffer,
1544     * and thereafter should not modify the buffer.  The buffer should be
1545     * cleared out by, at the end, calling this method with a literal
1546     * character (which may be -1).
1547     */
1548    public static void appendToRule(StringBuffer rule,
1549            int c,
1550            boolean isLiteral,
1551            boolean escapeUnprintable,
1552            StringBuffer quoteBuf) {
1553        // If we are escaping unprintables, then escape them outside
1554        // quotes.  \\u and \\U are not recognized within quotes.  The same
1555        // logic applies to literals, but literals are never escaped.
1556        if (isLiteral ||
1557                (escapeUnprintable && Utility.isUnprintable(c))) {
1558            if (quoteBuf.length() > 0) {
1559                // We prefer backslash APOSTROPHE to double APOSTROPHE
1560                // (more readable, less similar to ") so if there are
1561                // double APOSTROPHEs at the ends, we pull them outside
1562                // of the quote.
1563
1564                // If the first thing in the quoteBuf is APOSTROPHE
1565                // (doubled) then pull it out.
1566                while (quoteBuf.length() >= 2 &&
1567                        quoteBuf.charAt(0) == APOSTROPHE &&
1568                        quoteBuf.charAt(1) == APOSTROPHE) {
1569                    rule.append(BACKSLASH).append(APOSTROPHE);
1570                    quoteBuf.delete(0, 2);
1571                }
1572                // If the last thing in the quoteBuf is APOSTROPHE
1573                // (doubled) then remove and count it and add it after.
1574                int trailingCount = 0;
1575                while (quoteBuf.length() >= 2 &&
1576                        quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
1577                        quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
1578                    quoteBuf.setLength(quoteBuf.length()-2);
1579                    ++trailingCount;
1580                }
1581                if (quoteBuf.length() > 0) {
1582                    rule.append(APOSTROPHE);
1583                    rule.append(quoteBuf);
1584                    rule.append(APOSTROPHE);
1585                    quoteBuf.setLength(0);
1586                }
1587                while (trailingCount-- > 0) {
1588                    rule.append(BACKSLASH).append(APOSTROPHE);
1589                }
1590            }
1591            if (c != -1) {
1592                /* Since spaces are ignored during parsing, they are
1593                 * emitted only for readability.  We emit one here
1594                 * only if there isn't already one at the end of the
1595                 * rule.
1596                 */
1597                if (c == ' ') {
1598                    int len = rule.length();
1599                    if (len > 0 && rule.charAt(len-1) != ' ') {
1600                        rule.append(' ');
1601                    }
1602                } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) {
1603                    rule.appendCodePoint(c);
1604                }
1605            }
1606        }
1607
1608        // Escape ' and '\' and don't begin a quote just for them
1609        else if (quoteBuf.length() == 0 &&
1610                (c == APOSTROPHE || c == BACKSLASH)) {
1611            rule.append(BACKSLASH).append((char)c);
1612        }
1613
1614        // Specials (printable ascii that isn't [0-9a-zA-Z]) and
1615        // whitespace need quoting.  Also append stuff to quotes if we are
1616        // building up a quoted substring already.
1617        else if (quoteBuf.length() > 0 ||
1618                (c >= 0x0021 && c <= 0x007E &&
1619                        !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
1620                                (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
1621                                (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
1622                                PatternProps.isWhiteSpace(c)) {
1623            quoteBuf.appendCodePoint(c);
1624            // Double ' within a quote
1625            if (c == APOSTROPHE) {
1626                quoteBuf.append((char)c);
1627            }
1628        }
1629
1630        // Otherwise just append
1631        else {
1632            rule.appendCodePoint(c);
1633        }
1634    }
1635
1636    /**
1637     * Append the given string to the rule.  Calls the single-character
1638     * version of appendToRule for each character.
1639     */
1640    public static void appendToRule(StringBuffer rule,
1641            String text,
1642            boolean isLiteral,
1643            boolean escapeUnprintable,
1644            StringBuffer quoteBuf) {
1645        for (int i=0; i<text.length(); ++i) {
1646            // Okay to process in 16-bit code units here
1647            appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
1648        }
1649    }
1650
1651    /**
1652     * Given a matcher reference, which may be null, append its
1653     * pattern as a literal to the given rule.
1654     */
1655    public static void appendToRule(StringBuffer rule,
1656            UnicodeMatcher matcher,
1657            boolean escapeUnprintable,
1658            StringBuffer quoteBuf) {
1659        if (matcher != null) {
1660            appendToRule(rule, matcher.toPattern(escapeUnprintable),
1661                    true, escapeUnprintable, quoteBuf);
1662        }
1663    }
1664
1665    /**
1666     * Compares 2 unsigned integers
1667     * @param source 32 bit unsigned integer
1668     * @param target 32 bit unsigned integer
1669     * @return 0 if equals, 1 if source is greater than target and -1
1670     *         otherwise
1671     */
1672    public static final int compareUnsigned(int source, int target)
1673    {
1674        source += MAGIC_UNSIGNED;
1675        target += MAGIC_UNSIGNED;
1676        if (source < target) {
1677            return -1;
1678        }
1679        else if (source > target) {
1680            return 1;
1681        }
1682        return 0;
1683    }
1684
1685    /**
1686     * Find the highest bit in a positive integer. This is done
1687     * by doing a binary search through the bits.
1688     *
1689     * @param n is the integer
1690     *
1691     * @return the bit number of the highest bit, with 0 being
1692     * the low order bit, or -1 if <code>n</code> is not positive
1693     */
1694    public static final byte highBit(int n)
1695    {
1696        if (n <= 0) {
1697            return -1;
1698        }
1699
1700        byte bit = 0;
1701
1702        if (n >= 1 << 16) {
1703            n >>= 16;
1704        bit += 16;
1705        }
1706
1707        if (n >= 1 << 8) {
1708            n >>= 8;
1709        bit += 8;
1710        }
1711
1712        if (n >= 1 << 4) {
1713            n >>= 4;
1714        bit += 4;
1715        }
1716
1717        if (n >= 1 << 2) {
1718            n >>= 2;
1719        bit += 2;
1720        }
1721
1722        if (n >= 1 << 1) {
1723            n >>= 1;
1724        bit += 1;
1725        }
1726
1727        return bit;
1728    }
1729    /**
1730     * Utility method to take a int[] containing codepoints and return
1731     * a string representation with code units.
1732     */
1733    public static String valueOf(int[]source){
1734        // TODO: Investigate why this method is not on UTF16 class
1735        StringBuilder result = new StringBuilder(source.length);
1736        for(int i=0; i<source.length; i++){
1737            result.appendCodePoint(source[i]);
1738        }
1739        return result.toString();
1740    }
1741
1742
1743    /**
1744     * Utility to duplicate a string count times
1745     * @param s String to be duplicated.
1746     * @param count Number of times to duplicate a string.
1747     */
1748    public static String repeat(String s, int count) {
1749        if (count <= 0) return "";
1750        if (count == 1) return s;
1751        StringBuilder result = new StringBuilder();
1752        for (int i = 0; i < count; ++i) {
1753            result.append(s);
1754        }
1755        return result.toString();
1756    }
1757
1758    public static String[] splitString(String src, String target) {
1759        return src.split("\\Q" + target + "\\E");
1760    }
1761
1762    /**
1763     * Split the string at runs of ascii whitespace characters.
1764     */
1765    public static String[] splitWhitespace(String src) {
1766        return src.split("\\s+");
1767    }
1768
1769    /**
1770     * Parse a list of hex numbers and return a string
1771     * @param string String of hex numbers.
1772     * @param minLength Minimal length.
1773     * @param separator Separator.
1774     * @return A string from hex numbers.
1775     */
1776    public static String fromHex(String string, int minLength, String separator) {
1777        return fromHex(string, minLength, Pattern.compile(separator != null ? separator : "\\s+"));
1778    }
1779
1780    /**
1781     * Parse a list of hex numbers and return a string
1782     * @param string String of hex numbers.
1783     * @param minLength Minimal length.
1784     * @param separator Separator.
1785     * @return A string from hex numbers.
1786     */
1787    public static String fromHex(String string, int minLength, Pattern separator) {
1788        StringBuilder buffer = new StringBuilder();
1789        String[] parts = separator.split(string);
1790        for (String part : parts) {
1791            if (part.length() < minLength) {
1792                throw new IllegalArgumentException("code point too short: " + part);
1793            }
1794            int cp = Integer.parseInt(part, 16);
1795            buffer.appendCodePoint(cp);
1796        }
1797        return buffer.toString();
1798    }
1799}
1800