1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 *******************************************************************************
6 * Copyright (C) 1996-2015, International Business Machines Corporation and    *
7 * others. All Rights Reserved.                                                *
8 *******************************************************************************
9 */
10package android.icu.impl;
11
12import java.io.IOException;
13import java.util.ArrayList;
14import java.util.Locale;
15import java.util.regex.Pattern;
16
17import android.icu.lang.UCharacter;
18import android.icu.text.Replaceable;
19import android.icu.text.UTF16;
20import android.icu.text.UnicodeMatcher;
21
22/**
23 * @hide Only a subset of ICU is exposed in Android
24 */
25public final class Utility {
26
27    private static final char APOSTROPHE = '\'';
28    private static final char BACKSLASH  = '\\';
29    private static final int MAGIC_UNSIGNED = 0x80000000;
30
31    /**
32     * Convenience utility to compare two Object[]s.
33     * Ought to be in System
34     */
35    public final static boolean arrayEquals(Object[] source, Object target) {
36        if (source == null) return (target == null);
37        if (!(target instanceof Object[])) return false;
38        Object[] targ = (Object[]) target;
39        return (source.length == targ.length
40                && arrayRegionMatches(source, 0, targ, 0, source.length));
41    }
42
43    /**
44     * Convenience utility to compare two int[]s
45     * Ought to be in System
46     */
47    public final static boolean arrayEquals(int[] source, Object target) {
48        if (source == null) return (target == null);
49        if (!(target instanceof int[])) return false;
50        int[] targ = (int[]) target;
51        return (source.length == targ.length
52                && arrayRegionMatches(source, 0, targ, 0, source.length));
53    }
54
55    /**
56     * Convenience utility to compare two double[]s
57     * Ought to be in System
58     */
59    public final static boolean arrayEquals(double[] source, Object target) {
60        if (source == null) return (target == null);
61        if (!(target instanceof double[])) return false;
62        double[] targ = (double[]) target;
63        return (source.length == targ.length
64                && arrayRegionMatches(source, 0, targ, 0, source.length));
65    }
66    public final static boolean arrayEquals(byte[] source, Object target) {
67        if (source == null) return (target == null);
68        if (!(target instanceof byte[])) return false;
69        byte[] targ = (byte[]) target;
70        return (source.length == targ.length
71                && arrayRegionMatches(source, 0, targ, 0, source.length));
72    }
73
74    /**
75     * Convenience utility to compare two Object[]s
76     * Ought to be in System
77     */
78    public final static boolean arrayEquals(Object source, Object target) {
79        if (source == null) return (target == null);
80        // for some reason, the correct arrayEquals is not being called
81        // so do it by hand for now.
82        if (source instanceof Object[])
83            return(arrayEquals((Object[]) source,target));
84        if (source instanceof int[])
85            return(arrayEquals((int[]) source,target));
86        if (source instanceof double[])
87            return(arrayEquals((double[]) source, target));
88        if (source instanceof byte[])
89            return(arrayEquals((byte[]) source,target));
90        return source.equals(target);
91    }
92
93    /**
94     * Convenience utility to compare two Object[]s
95     * Ought to be in System.
96     * @param len the length to compare.
97     * The start indices and start+len must be valid.
98     */
99    public final static boolean arrayRegionMatches(Object[] source, int sourceStart,
100            Object[] target, int targetStart,
101            int len)
102    {
103        int sourceEnd = sourceStart + len;
104        int delta = targetStart - sourceStart;
105        for (int i = sourceStart; i < sourceEnd; i++) {
106            if (!arrayEquals(source[i],target[i + delta]))
107                return false;
108        }
109        return true;
110    }
111
112    /**
113     * Convenience utility to compare two Object[]s
114     * Ought to be in System.
115     * @param len the length to compare.
116     * The start indices and start+len must be valid.
117     */
118    public final static boolean arrayRegionMatches(char[] source, int sourceStart,
119            char[] target, int targetStart,
120            int len)
121    {
122        int sourceEnd = sourceStart + len;
123        int delta = targetStart - sourceStart;
124        for (int i = sourceStart; i < sourceEnd; i++) {
125            if (source[i]!=target[i + delta])
126                return false;
127        }
128        return true;
129    }
130
131    /**
132     * Convenience utility to compare two int[]s.
133     * @param len the length to compare.
134     * The start indices and start+len must be valid.
135     * Ought to be in System
136     */
137    public final static boolean arrayRegionMatches(int[] source, int sourceStart,
138            int[] target, int targetStart,
139            int len)
140    {
141        int sourceEnd = sourceStart + len;
142        int delta = targetStart - sourceStart;
143        for (int i = sourceStart; i < sourceEnd; i++) {
144            if (source[i] != target[i + delta])
145                return false;
146        }
147        return true;
148    }
149
150    /**
151     * Convenience utility to compare two arrays of doubles.
152     * @param len the length to compare.
153     * The start indices and start+len must be valid.
154     * Ought to be in System
155     */
156    public final static boolean arrayRegionMatches(double[] source, int sourceStart,
157            double[] target, int targetStart,
158            int len)
159    {
160        int sourceEnd = sourceStart + len;
161        int delta = targetStart - sourceStart;
162        for (int i = sourceStart; i < sourceEnd; i++) {
163            if (source[i] != target[i + delta])
164                return false;
165        }
166        return true;
167    }
168    public final static boolean arrayRegionMatches(byte[] source, int sourceStart,
169            byte[] target, int targetStart, int len){
170        int sourceEnd = sourceStart + len;
171        int delta = targetStart - sourceStart;
172        for (int i = sourceStart; i < sourceEnd; i++) {
173            if (source[i] != target[i + delta])
174                return false;
175        }
176        return true;
177    }
178
179    /**
180     * Trivial reference equality.
181     * This method should help document that we really want == not equals(),
182     * and to have a single place to suppress warnings from static analysis tools.
183     */
184    public static final boolean sameObjects(Object a, Object b) {
185        return a == b;
186    }
187
188    /**
189     * Convenience utility. Does null checks on objects, then calls equals.
190     */
191    public final static boolean objectEquals(Object a, Object b) {
192        return a == null ?
193                b == null ? true : false :
194                    b == null ? false : a.equals(b);
195    }
196
197    /**
198     * Convenience utility. Does null checks on objects, then calls compare.
199     */
200    public static <T extends Comparable<T>> int checkCompare(T a, T b) {
201        return a == null ?
202                b == null ? 0 : -1 :
203                    b == null ? 1 : a.compareTo(b);
204      }
205
206    /**
207     * Convenience utility. Does null checks on object, then calls hashCode.
208     */
209    public static int checkHash(Object a) {
210        return a == null ? 0 : a.hashCode();
211      }
212
213    /**
214     * The ESCAPE character is used during run-length encoding.  It signals
215     * a run of identical chars.
216     */
217    private static final char ESCAPE = '\uA5A5';
218
219    /**
220     * The ESCAPE_BYTE character is used during run-length encoding.  It signals
221     * a run of identical bytes.
222     */
223    static final byte ESCAPE_BYTE = (byte)0xA5;
224
225    /**
226     * Construct a string representing an int array.  Use run-length encoding.
227     * A character represents itself, unless it is the ESCAPE character.  Then
228     * the following notations are possible:
229     *   ESCAPE ESCAPE   ESCAPE literal
230     *   ESCAPE n c      n instances of character c
231     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
232     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
233     * If we encounter a run where n == ESCAPE, we represent this as:
234     *   c ESCAPE n-1 c
235     * The ESCAPE value is chosen so as not to collide with commonly
236     * seen values.
237     */
238    static public final String arrayToRLEString(int[] a) {
239        StringBuilder buffer = new StringBuilder();
240
241        appendInt(buffer, a.length);
242        int runValue = a[0];
243        int runLength = 1;
244        for (int i=1; i<a.length; ++i) {
245            int s = a[i];
246            if (s == runValue && runLength < 0xFFFF) {
247                ++runLength;
248            } else {
249                encodeRun(buffer, runValue, runLength);
250                runValue = s;
251                runLength = 1;
252            }
253        }
254        encodeRun(buffer, runValue, runLength);
255        return buffer.toString();
256    }
257
258    /**
259     * Construct a string representing a short array.  Use run-length encoding.
260     * A character represents itself, unless it is the ESCAPE character.  Then
261     * the following notations are possible:
262     *   ESCAPE ESCAPE   ESCAPE literal
263     *   ESCAPE n c      n instances of character c
264     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
265     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
266     * If we encounter a run where n == ESCAPE, we represent this as:
267     *   c ESCAPE n-1 c
268     * The ESCAPE value is chosen so as not to collide with commonly
269     * seen values.
270     */
271    static public final String arrayToRLEString(short[] a) {
272        StringBuilder buffer = new StringBuilder();
273        // for (int i=0; i<a.length; ++i) buffer.append((char) a[i]);
274        buffer.append((char) (a.length >> 16));
275        buffer.append((char) a.length);
276        short runValue = a[0];
277        int runLength = 1;
278        for (int i=1; i<a.length; ++i) {
279            short s = a[i];
280            if (s == runValue && runLength < 0xFFFF) ++runLength;
281            else {
282                encodeRun(buffer, runValue, runLength);
283                runValue = s;
284                runLength = 1;
285            }
286        }
287        encodeRun(buffer, runValue, runLength);
288        return buffer.toString();
289    }
290
291    /**
292     * Construct a string representing a char array.  Use run-length encoding.
293     * A character represents itself, unless it is the ESCAPE character.  Then
294     * the following notations are possible:
295     *   ESCAPE ESCAPE   ESCAPE literal
296     *   ESCAPE n c      n instances of character c
297     * Since an encoded run occupies 3 characters, we only encode runs of 4 or
298     * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
299     * If we encounter a run where n == ESCAPE, we represent this as:
300     *   c ESCAPE n-1 c
301     * The ESCAPE value is chosen so as not to collide with commonly
302     * seen values.
303     */
304    static public final String arrayToRLEString(char[] a) {
305        StringBuilder buffer = new StringBuilder();
306        buffer.append((char) (a.length >> 16));
307        buffer.append((char) a.length);
308        char runValue = a[0];
309        int runLength = 1;
310        for (int i=1; i<a.length; ++i) {
311            char s = a[i];
312            if (s == runValue && runLength < 0xFFFF) ++runLength;
313            else {
314                encodeRun(buffer, (short)runValue, runLength);
315                runValue = s;
316                runLength = 1;
317            }
318        }
319        encodeRun(buffer, (short)runValue, runLength);
320        return buffer.toString();
321    }
322
323    /**
324     * Construct a string representing a byte array.  Use run-length encoding.
325     * Two bytes are packed into a single char, with a single extra zero byte at
326     * the end if needed.  A byte represents itself, unless it is the
327     * ESCAPE_BYTE.  Then the following notations are possible:
328     *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
329     *   ESCAPE_BYTE n b           n instances of byte b
330     * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
331     * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
332     * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
333     *   b ESCAPE_BYTE n-1 b
334     * The ESCAPE_BYTE value is chosen so as not to collide with commonly
335     * seen values.
336     */
337    static public final String arrayToRLEString(byte[] a) {
338        StringBuilder buffer = new StringBuilder();
339        buffer.append((char) (a.length >> 16));
340        buffer.append((char) a.length);
341        byte runValue = a[0];
342        int runLength = 1;
343        byte[] state = new byte[2];
344        for (int i=1; i<a.length; ++i) {
345            byte b = a[i];
346            if (b == runValue && runLength < 0xFF) ++runLength;
347            else {
348                encodeRun(buffer, runValue, runLength, state);
349                runValue = b;
350                runLength = 1;
351            }
352        }
353        encodeRun(buffer, runValue, runLength, state);
354
355        // We must save the final byte, if there is one, by padding
356        // an extra zero.
357        if (state[0] != 0) appendEncodedByte(buffer, (byte)0, state);
358
359        return buffer.toString();
360    }
361
362    /**
363     * Encode a run, possibly a degenerate run (of < 4 values).
364     * @param length The length of the run; must be > 0 && <= 0xFFFF.
365     */
366    private static final <T extends Appendable> void encodeRun(T buffer, int value, int length) {
367        if (length < 4) {
368            for (int j=0; j<length; ++j) {
369                if (value == ESCAPE) {
370                    appendInt(buffer, value);
371                }
372                appendInt(buffer, value);
373            }
374        }
375        else {
376            if (length == ESCAPE) {
377                if (value == ESCAPE) {
378                    appendInt(buffer, ESCAPE);
379                }
380                appendInt(buffer, value);
381                --length;
382            }
383            appendInt(buffer, ESCAPE);
384            appendInt(buffer, length);
385            appendInt(buffer, value); // Don't need to escape this value
386        }
387    }
388
389    private static final <T extends Appendable> void appendInt(T buffer, int value) {
390        try {
391            buffer.append((char)(value >>> 16));
392            buffer.append((char)(value & 0xFFFF));
393        } catch (IOException e) {
394            throw new IllegalIcuArgumentException(e);
395        }
396    }
397
398    /**
399     * Encode a run, possibly a degenerate run (of < 4 values).
400     * @param length The length of the run; must be > 0 && <= 0xFFFF.
401     */
402    private static final <T extends Appendable> void encodeRun(T buffer, short value, int length) {
403        try {
404            char valueChar = (char) value;
405            if (length < 4) {
406                for (int j=0; j<length; ++j) {
407                    if (valueChar == ESCAPE) {
408                        buffer.append(ESCAPE);
409                    }
410                    buffer.append(valueChar);
411                }
412            }
413            else {
414                if (length == ESCAPE) {
415                    if (valueChar == ESCAPE) {
416                        buffer.append(ESCAPE);
417                    }
418                    buffer.append(valueChar);
419                    --length;
420                }
421                buffer.append(ESCAPE);
422                buffer.append((char) length);
423                buffer.append(valueChar); // Don't need to escape this value
424            }
425        } catch (IOException e) {
426            throw new IllegalIcuArgumentException(e);
427        }
428    }
429
430    /**
431     * Encode a run, possibly a degenerate run (of < 4 values).
432     * @param length The length of the run; must be > 0 && <= 0xFF.
433     */
434    private static final <T extends Appendable> void encodeRun(T buffer, byte value, int length,
435            byte[] state) {
436        if (length < 4) {
437            for (int j=0; j<length; ++j) {
438                if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
439                appendEncodedByte(buffer, value, state);
440            }
441        }
442        else {
443            if ((byte)length == ESCAPE_BYTE) {
444                if (value == ESCAPE_BYTE) appendEncodedByte(buffer, ESCAPE_BYTE, state);
445                appendEncodedByte(buffer, value, state);
446                --length;
447            }
448            appendEncodedByte(buffer, ESCAPE_BYTE, state);
449            appendEncodedByte(buffer, (byte)length, state);
450            appendEncodedByte(buffer, value, state); // Don't need to escape this value
451        }
452    }
453
454    /**
455     * Append a byte to the given Appendable, packing two bytes into each
456     * character.  The state parameter maintains intermediary data between
457     * calls.
458     * @param state A two-element array, with state[0] == 0 if this is the
459     * first byte of a pair, or state[0] != 0 if this is the second byte
460     * of a pair, in which case state[1] is the first byte.
461     */
462    private static final <T extends Appendable> void appendEncodedByte(T buffer, byte value,
463            byte[] state) {
464        try {
465            if (state[0] != 0) {
466                char c = (char) ((state[1] << 8) | ((value) & 0xFF));
467                buffer.append(c);
468                state[0] = 0;
469            }
470            else {
471                state[0] = 1;
472                state[1] = value;
473            }
474        } catch (IOException e) {
475            throw new IllegalIcuArgumentException(e);
476        }
477    }
478
479    /**
480     * Construct an array of ints from a run-length encoded string.
481     */
482    static public final int[] RLEStringToIntArray(String s) {
483        int length = getInt(s, 0);
484        int[] array = new int[length];
485        int ai = 0, i = 1;
486
487        int maxI = s.length() / 2;
488        while (ai < length && i < maxI) {
489            int c = getInt(s, i++);
490
491            if (c == ESCAPE) {
492                c = getInt(s, i++);
493                if (c == ESCAPE) {
494                    array[ai++] = c;
495                } else {
496                    int runLength = c;
497                    int runValue = getInt(s, i++);
498                    for (int j=0; j<runLength; ++j) {
499                        array[ai++] = runValue;
500                    }
501                }
502            }
503            else {
504                array[ai++] = c;
505            }
506        }
507
508        if (ai != length || i != maxI) {
509            throw new IllegalStateException("Bad run-length encoded int array");
510        }
511
512        return array;
513    }
514    static final int getInt(String s, int i) {
515        return ((s.charAt(2*i)) << 16) | s.charAt(2*i+1);
516    }
517
518    /**
519     * Construct an array of shorts from a run-length encoded string.
520     */
521    static public final short[] RLEStringToShortArray(String s) {
522        int length = ((s.charAt(0)) << 16) | (s.charAt(1));
523        short[] array = new short[length];
524        int ai = 0;
525        for (int i=2; i<s.length(); ++i) {
526            char c = s.charAt(i);
527            if (c == ESCAPE) {
528                c = s.charAt(++i);
529                if (c == ESCAPE) {
530                    array[ai++] = (short) c;
531                } else {
532                    int runLength = c;
533                    short runValue = (short) s.charAt(++i);
534                    for (int j=0; j<runLength; ++j) array[ai++] = runValue;
535                }
536            }
537            else {
538                array[ai++] = (short) c;
539            }
540        }
541
542        if (ai != length)
543            throw new IllegalStateException("Bad run-length encoded short array");
544
545        return array;
546    }
547
548    /**
549     * Construct an array of shorts from a run-length encoded string.
550     */
551    static public final char[] RLEStringToCharArray(String s) {
552        int length = ((s.charAt(0)) << 16) | (s.charAt(1));
553        char[] array = new char[length];
554        int ai = 0;
555        for (int i=2; i<s.length(); ++i) {
556            char c = s.charAt(i);
557            if (c == ESCAPE) {
558                c = s.charAt(++i);
559                if (c == ESCAPE) {
560                    array[ai++] = c;
561                } else {
562                    int runLength = c;
563                    char runValue = s.charAt(++i);
564                    for (int j=0; j<runLength; ++j) array[ai++] = runValue;
565                }
566            }
567            else {
568                array[ai++] = c;
569            }
570        }
571
572        if (ai != length)
573            throw new IllegalStateException("Bad run-length encoded short array");
574
575        return array;
576    }
577
578    /**
579     * Construct an array of bytes from a run-length encoded string.
580     */
581    static public final byte[] RLEStringToByteArray(String s) {
582        int length = ((s.charAt(0)) << 16) | (s.charAt(1));
583        byte[] array = new byte[length];
584        boolean nextChar = true;
585        char c = 0;
586        int node = 0;
587        int runLength = 0;
588        int i = 2;
589        for (int ai=0; ai<length; ) {
590            // This part of the loop places the next byte into the local
591            // variable 'b' each time through the loop.  It keeps the
592            // current character in 'c' and uses the boolean 'nextChar'
593            // to see if we've taken both bytes out of 'c' yet.
594            byte b;
595            if (nextChar) {
596                c = s.charAt(i++);
597                b = (byte) (c >> 8);
598                nextChar = false;
599            }
600            else {
601                b = (byte) (c & 0xFF);
602                nextChar = true;
603            }
604
605            // This part of the loop is a tiny state machine which handles
606            // the parsing of the run-length encoding.  This would be simpler
607            // if we could look ahead, but we can't, so we use 'node' to
608            // move between three nodes in the state machine.
609            switch (node) {
610            case 0:
611                // Normal idle node
612                if (b == ESCAPE_BYTE) {
613                    node = 1;
614                }
615                else {
616                    array[ai++] = b;
617                }
618                break;
619            case 1:
620                // We have seen one ESCAPE_BYTE; we expect either a second
621                // one, or a run length and value.
622                if (b == ESCAPE_BYTE) {
623                    array[ai++] = ESCAPE_BYTE;
624                    node = 0;
625                }
626                else {
627                    runLength = b;
628                    // Interpret signed byte as unsigned
629                    if (runLength < 0) runLength += 0x100;
630                    node = 2;
631                }
632                break;
633            case 2:
634                // We have seen an ESCAPE_BYTE and length byte.  We interpret
635                // the next byte as the value to be repeated.
636                for (int j=0; j<runLength; ++j) array[ai++] = b;
637                node = 0;
638                break;
639            }
640        }
641
642        if (node != 0)
643            throw new IllegalStateException("Bad run-length encoded byte array");
644
645        if (i != s.length())
646            throw new IllegalStateException("Excess data in RLE byte array string");
647
648        return array;
649    }
650
651    static public String LINE_SEPARATOR = System.getProperty("line.separator");
652
653    /**
654     * Format a String for representation in a source file.  This includes
655     * breaking it into lines and escaping characters using octal notation
656     * when necessary (control characters and double quotes).
657     */
658    static public final String formatForSource(String s) {
659        StringBuilder buffer = new StringBuilder();
660        for (int i=0; i<s.length();) {
661            if (i > 0) buffer.append('+').append(LINE_SEPARATOR);
662            buffer.append("        \"");
663            int count = 11;
664            while (i<s.length() && count<80) {
665                char c = s.charAt(i++);
666                if (c < '\u0020' || c == '"' || c == '\\') {
667                    if (c == '\n') {
668                        buffer.append("\\n");
669                        count += 2;
670                    } else if (c == '\t') {
671                        buffer.append("\\t");
672                        count += 2;
673                    } else if (c == '\r') {
674                        buffer.append("\\r");
675                        count += 2;
676                    } else {
677                        // Represent control characters, backslash and double quote
678                        // using octal notation; otherwise the string we form
679                        // won't compile, since Unicode escape sequences are
680                        // processed before tokenization.
681                        buffer.append('\\');
682                        buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
683                        buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
684                        buffer.append(HEX_DIGIT[(c & 0007)]);
685                        count += 4;
686                    }
687                }
688                else if (c <= '\u007E') {
689                    buffer.append(c);
690                    count += 1;
691                }
692                else {
693                    buffer.append("\\u");
694                    buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
695                    buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
696                    buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
697                    buffer.append(HEX_DIGIT[(c & 0x000F)]);
698                    count += 6;
699                }
700            }
701            buffer.append('"');
702        }
703        return buffer.toString();
704    }
705
706    static final char[] HEX_DIGIT = {'0','1','2','3','4','5','6','7',
707        '8','9','A','B','C','D','E','F'};
708
709    /**
710     * Format a String for representation in a source file.  Like
711     * formatForSource but does not do line breaking.
712     */
713    static public final String format1ForSource(String s) {
714        StringBuilder buffer = new StringBuilder();
715        buffer.append("\"");
716        for (int i=0; i<s.length();) {
717            char c = s.charAt(i++);
718            if (c < '\u0020' || c == '"' || c == '\\') {
719                if (c == '\n') {
720                    buffer.append("\\n");
721                } else if (c == '\t') {
722                    buffer.append("\\t");
723                } else if (c == '\r') {
724                    buffer.append("\\r");
725                } else {
726                    // Represent control characters, backslash and double quote
727                    // using octal notation; otherwise the string we form
728                    // won't compile, since Unicode escape sequences are
729                    // processed before tokenization.
730                    buffer.append('\\');
731                    buffer.append(HEX_DIGIT[(c & 0700) >> 6]); // HEX_DIGIT works for octal
732                    buffer.append(HEX_DIGIT[(c & 0070) >> 3]);
733                    buffer.append(HEX_DIGIT[(c & 0007)]);
734                }
735            }
736            else if (c <= '\u007E') {
737                buffer.append(c);
738            }
739            else {
740                buffer.append("\\u");
741                buffer.append(HEX_DIGIT[(c & 0xF000) >> 12]);
742                buffer.append(HEX_DIGIT[(c & 0x0F00) >> 8]);
743                buffer.append(HEX_DIGIT[(c & 0x00F0) >> 4]);
744                buffer.append(HEX_DIGIT[(c & 0x000F)]);
745            }
746        }
747        buffer.append('"');
748        return buffer.toString();
749    }
750
751    /**
752     * Convert characters outside the range U+0020 to U+007F to
753     * Unicode escapes, and convert backslash to a double backslash.
754     */
755    public static final String escape(String s) {
756        StringBuilder buf = new StringBuilder();
757        for (int i=0; i<s.length(); ) {
758            int c = Character.codePointAt(s, i);
759            i += UTF16.getCharCount(c);
760            if (c >= ' ' && c <= 0x007F) {
761                if (c == '\\') {
762                    buf.append("\\\\"); // That is, "\\"
763                } else {
764                    buf.append((char)c);
765                }
766            } else {
767                boolean four = c <= 0xFFFF;
768                buf.append(four ? "\\u" : "\\U");
769                buf.append(hex(c, four ? 4 : 8));
770            }
771        }
772        return buf.toString();
773    }
774
775    /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
776    static private final char[] UNESCAPE_MAP = {
777        /*"   0x22, 0x22 */
778        /*'   0x27, 0x27 */
779        /*?   0x3F, 0x3F */
780        /*\   0x5C, 0x5C */
781        /*a*/ 0x61, 0x07,
782        /*b*/ 0x62, 0x08,
783        /*e*/ 0x65, 0x1b,
784        /*f*/ 0x66, 0x0c,
785        /*n*/ 0x6E, 0x0a,
786        /*r*/ 0x72, 0x0d,
787        /*t*/ 0x74, 0x09,
788        /*v*/ 0x76, 0x0b
789    };
790
791    /**
792     * Convert an escape to a 32-bit code point value.  We attempt
793     * to parallel the icu4c unescapeAt() function.
794     * @param offset16 an array containing offset to the character
795     * <em>after</em> the backslash.  Upon return offset16[0] will
796     * be updated to point after the escape sequence.
797     * @return character value from 0 to 10FFFF, or -1 on error.
798     */
799    public static int unescapeAt(String s, int[] offset16) {
800        int c;
801        int result = 0;
802        int n = 0;
803        int minDig = 0;
804        int maxDig = 0;
805        int bitsPerDigit = 4;
806        int dig;
807        int i;
808        boolean braces = false;
809
810        /* Check that offset is in range */
811        int offset = offset16[0];
812        int length = s.length();
813        if (offset < 0 || offset >= length) {
814            return -1;
815        }
816
817        /* Fetch first UChar after '\\' */
818        c = Character.codePointAt(s, offset);
819        offset += UTF16.getCharCount(c);
820
821        /* Convert hexadecimal and octal escapes */
822        switch (c) {
823        case 'u':
824            minDig = maxDig = 4;
825            break;
826        case 'U':
827            minDig = maxDig = 8;
828            break;
829        case 'x':
830            minDig = 1;
831            if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
832                ++offset;
833                braces = true;
834                maxDig = 8;
835            } else {
836                maxDig = 2;
837            }
838            break;
839        default:
840            dig = UCharacter.digit(c, 8);
841            if (dig >= 0) {
842                minDig = 1;
843                maxDig = 3;
844                n = 1; /* Already have first octal digit */
845                bitsPerDigit = 3;
846                result = dig;
847            }
848            break;
849        }
850        if (minDig != 0) {
851            while (offset < length && n < maxDig) {
852                c = UTF16.charAt(s, offset);
853                dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
854                if (dig < 0) {
855                    break;
856                }
857                result = (result << bitsPerDigit) | dig;
858                offset += UTF16.getCharCount(c);
859                ++n;
860            }
861            if (n < minDig) {
862                return -1;
863            }
864            if (braces) {
865                if (c != 0x7D /*}*/) {
866                    return -1;
867                }
868                ++offset;
869            }
870            if (result < 0 || result >= 0x110000) {
871                return -1;
872            }
873            // If an escape sequence specifies a lead surrogate, see
874            // if there is a trail surrogate after it, either as an
875            // escape or as a literal.  If so, join them up into a
876            // supplementary.
877            if (offset < length &&
878                    UTF16.isLeadSurrogate((char) result)) {
879                int ahead = offset+1;
880                c = s.charAt(offset); // [sic] get 16-bit code unit
881                if (c == '\\' && ahead < length) {
882                    int o[] = new int[] { ahead };
883                    c = unescapeAt(s, o);
884                    ahead = o[0];
885                }
886                if (UTF16.isTrailSurrogate((char) c)) {
887                    offset = ahead;
888                    result = Character.toCodePoint((char) result, (char) c);
889                }
890            }
891            offset16[0] = offset;
892            return result;
893        }
894
895        /* Convert C-style escapes in table */
896        for (i=0; i<UNESCAPE_MAP.length; i+=2) {
897            if (c == UNESCAPE_MAP[i]) {
898                offset16[0] = offset;
899                return UNESCAPE_MAP[i+1];
900            } else if (c < UNESCAPE_MAP[i]) {
901                break;
902            }
903        }
904
905        /* Map \cX to control-X: X & 0x1F */
906        if (c == 'c' && offset < length) {
907            c = UTF16.charAt(s, offset);
908            offset16[0] = offset + UTF16.getCharCount(c);
909            return 0x1F & c;
910        }
911
912        /* If no special forms are recognized, then consider
913         * the backslash to generically escape the next character. */
914        offset16[0] = offset;
915        return c;
916    }
917
918    /**
919     * Convert all escapes in a given string using unescapeAt().
920     * @exception IllegalArgumentException if an invalid escape is
921     * seen.
922     */
923    public static String unescape(String s) {
924        StringBuilder buf = new StringBuilder();
925        int[] pos = new int[1];
926        for (int i=0; i<s.length(); ) {
927            char c = s.charAt(i++);
928            if (c == '\\') {
929                pos[0] = i;
930                int e = unescapeAt(s, pos);
931                if (e < 0) {
932                    throw new IllegalArgumentException("Invalid escape sequence " +
933                            s.substring(i-1, Math.min(i+8, s.length())));
934                }
935                buf.appendCodePoint(e);
936                i = pos[0];
937            } else {
938                buf.append(c);
939            }
940        }
941        return buf.toString();
942    }
943
944    /**
945     * Convert all escapes in a given string using unescapeAt().
946     * Leave invalid escape sequences unchanged.
947     */
948    public static String unescapeLeniently(String s) {
949        StringBuilder buf = new StringBuilder();
950        int[] pos = new int[1];
951        for (int i=0; i<s.length(); ) {
952            char c = s.charAt(i++);
953            if (c == '\\') {
954                pos[0] = i;
955                int e = unescapeAt(s, pos);
956                if (e < 0) {
957                    buf.append(c);
958                } else {
959                    buf.appendCodePoint(e);
960                    i = pos[0];
961                }
962            } else {
963                buf.append(c);
964            }
965        }
966        return buf.toString();
967    }
968
969    /**
970     * Convert a char to 4 hex uppercase digits.  E.g., hex('a') =>
971     * "0041".
972     */
973    public static String hex(long ch) {
974        return hex(ch, 4);
975    }
976
977    /**
978     * Supplies a zero-padded hex representation of an integer (without 0x)
979     */
980    static public String hex(long i, int places) {
981        if (i == Long.MIN_VALUE) return "-8000000000000000";
982        boolean negative = i < 0;
983        if (negative) {
984            i = -i;
985        }
986        String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH);
987        if (result.length() < places) {
988            result = "0000000000000000".substring(result.length(),places) + result;
989        }
990        if (negative) {
991            return '-' + result;
992        }
993        return result;
994    }
995
996    /**
997     * Convert a string to comma-separated groups of 4 hex uppercase
998     * digits.  E.g., hex('ab') => "0041,0042".
999     */
1000    public static String hex(CharSequence s) {
1001        return hex(s, 4, ",", true, new StringBuilder()).toString();
1002    }
1003
1004    /**
1005     * Convert a string to separated groups of hex uppercase
1006     * digits.  E.g., hex('ab'...) => "0041,0042".  Append the output
1007     * to the given Appendable.
1008     */
1009    public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
1010        try {
1011            if (useCodePoints) {
1012                int cp;
1013                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
1014                    cp = Character.codePointAt(s, i);
1015                    if (i != 0) {
1016                        result.append(separator);
1017                    }
1018                    result.append(hex(cp,width));
1019                }
1020            } else {
1021                for (int i = 0; i < s.length(); ++i) {
1022                    if (i != 0) {
1023                        result.append(separator);
1024                    }
1025                    result.append(hex(s.charAt(i),width));
1026                }
1027            }
1028            return result;
1029        } catch (IOException e) {
1030            throw new IllegalIcuArgumentException(e);
1031        }
1032    }
1033
1034    public static String hex(byte[] o, int start, int end, String separator) {
1035        StringBuilder result = new StringBuilder();
1036        //int ch;
1037        for (int i = start; i < end; ++i) {
1038          if (i != 0) result.append(separator);
1039          result.append(hex(o[i]));
1040        }
1041        return result.toString();
1042      }
1043
1044    /**
1045     * Convert a string to comma-separated groups of 4 hex uppercase
1046     * digits.  E.g., hex('ab') => "0041,0042".
1047     */
1048    public static <S extends CharSequence> String hex(S s, int width, S separator) {
1049        return hex(s, width, separator, true, new StringBuilder()).toString();
1050    }
1051
1052    /**
1053     * Split a string into pieces based on the given divider character
1054     * @param s the string to split
1055     * @param divider the character on which to split.  Occurrences of
1056     * this character are not included in the output
1057     * @param output an array to receive the substrings between
1058     * instances of divider.  It must be large enough on entry to
1059     * accomodate all output.  Adjacent instances of the divider
1060     * character will place empty strings into output.  Before
1061     * returning, output is padded out with empty strings.
1062     */
1063    public static void split(String s, char divider, String[] output) {
1064        int last = 0;
1065        int current = 0;
1066        int i;
1067        for (i = 0; i < s.length(); ++i) {
1068            if (s.charAt(i) == divider) {
1069                output[current++] = s.substring(last,i);
1070                last = i+1;
1071            }
1072        }
1073        output[current++] = s.substring(last,i);
1074        while (current < output.length) {
1075            output[current++] = "";
1076        }
1077    }
1078
1079    /**
1080     * Split a string into pieces based on the given divider character
1081     * @param s the string to split
1082     * @param divider the character on which to split.  Occurrences of
1083     * this character are not included in the output
1084     * @return output an array to receive the substrings between
1085     * instances of divider. Adjacent instances of the divider
1086     * character will place empty strings into output.
1087     */
1088    public static String[] split(String s, char divider) {
1089        int last = 0;
1090        int i;
1091        ArrayList<String> output = new ArrayList<String>();
1092        for (i = 0; i < s.length(); ++i) {
1093            if (s.charAt(i) == divider) {
1094                output.add(s.substring(last,i));
1095                last = i+1;
1096            }
1097        }
1098        output.add( s.substring(last,i));
1099        return output.toArray(new String[output.size()]);
1100    }
1101
1102    /**
1103     * Look up a given string in a string array.  Returns the index at
1104     * which the first occurrence of the string was found in the
1105     * array, or -1 if it was not found.
1106     * @param source the string to search for
1107     * @param target the array of zero or more strings in which to
1108     * look for source
1109     * @return the index of target at which source first occurs, or -1
1110     * if not found
1111     */
1112    public static int lookup(String source, String[] target) {
1113        for (int i = 0; i < target.length; ++i) {
1114            if (source.equals(target[i])) return i;
1115        }
1116        return -1;
1117    }
1118
1119    /**
1120     * Parse a single non-whitespace character 'ch', optionally
1121     * preceded by whitespace.
1122     * @param id the string to be parsed
1123     * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
1124     * offset of the first character to be parsed.  On output, pos[0]
1125     * is the index after the last parsed character.  If the parse
1126     * fails, pos[0] will be unchanged.
1127     * @param ch the non-whitespace character to be parsed.
1128     * @return true if 'ch' is seen preceded by zero or more
1129     * whitespace characters.
1130     */
1131    public static boolean parseChar(String id, int[] pos, char ch) {
1132        int start = pos[0];
1133        pos[0] = PatternProps.skipWhiteSpace(id, pos[0]);
1134        if (pos[0] == id.length() ||
1135                id.charAt(pos[0]) != ch) {
1136            pos[0] = start;
1137            return false;
1138        }
1139        ++pos[0];
1140        return true;
1141    }
1142
1143    /**
1144     * Parse a pattern string starting at offset pos.  Keywords are
1145     * matched case-insensitively.  Spaces may be skipped and may be
1146     * optional or required.  Integer values may be parsed, and if
1147     * they are, they will be returned in the given array.  If
1148     * successful, the offset of the next non-space character is
1149     * returned.  On failure, -1 is returned.
1150     * @param pattern must only contain lowercase characters, which
1151     * will match their uppercase equivalents as well.  A space
1152     * character matches one or more required spaces.  A '~' character
1153     * matches zero or more optional spaces.  A '#' character matches
1154     * an integer and stores it in parsedInts, which the caller must
1155     * ensure has enough capacity.
1156     * @param parsedInts array to receive parsed integers.  Caller
1157     * must ensure that parsedInts.length is >= the number of '#'
1158     * signs in 'pattern'.
1159     * @return the position after the last character parsed, or -1 if
1160     * the parse failed
1161     */
1162    @SuppressWarnings("fallthrough")
1163    public static int parsePattern(String rule, int pos, int limit,
1164            String pattern, int[] parsedInts) {
1165        // TODO Update this to handle surrogates
1166        int[] p = new int[1];
1167        int intCount = 0; // number of integers parsed
1168        for (int i=0; i<pattern.length(); ++i) {
1169            char cpat = pattern.charAt(i);
1170            char c;
1171            switch (cpat) {
1172            case ' ':
1173                if (pos >= limit) {
1174                    return -1;
1175                }
1176                c = rule.charAt(pos++);
1177                if (!PatternProps.isWhiteSpace(c)) {
1178                    return -1;
1179                }
1180                // FALL THROUGH to skipWhitespace
1181            case '~':
1182                pos = PatternProps.skipWhiteSpace(rule, pos);
1183                break;
1184            case '#':
1185                p[0] = pos;
1186                parsedInts[intCount++] = parseInteger(rule, p, limit);
1187                if (p[0] == pos) {
1188                    // Syntax error; failed to parse integer
1189                    return -1;
1190                }
1191                pos = p[0];
1192                break;
1193            default:
1194                if (pos >= limit) {
1195                    return -1;
1196                }
1197                c = (char) UCharacter.toLowerCase(rule.charAt(pos++));
1198                if (c != cpat) {
1199                    return -1;
1200                }
1201                break;
1202            }
1203        }
1204        return pos;
1205    }
1206
1207    /**
1208     * Parse a pattern string within the given Replaceable and a parsing
1209     * pattern.  Characters are matched literally and case-sensitively
1210     * except for the following special characters:
1211     *
1212     * ~  zero or more Pattern_White_Space chars
1213     *
1214     * If end of pattern is reached with all matches along the way,
1215     * pos is advanced to the first unparsed index and returned.
1216     * Otherwise -1 is returned.
1217     * @param pat pattern that controls parsing
1218     * @param text text to be parsed, starting at index
1219     * @param index offset to first character to parse
1220     * @param limit offset after last character to parse
1221     * @return index after last parsed character, or -1 on parse failure.
1222     */
1223    public static int parsePattern(String pat,
1224            Replaceable text,
1225            int index,
1226            int limit) {
1227        int ipat = 0;
1228
1229        // empty pattern matches immediately
1230        if (ipat == pat.length()) {
1231            return index;
1232        }
1233
1234        int cpat = Character.codePointAt(pat, ipat);
1235
1236        while (index < limit) {
1237            int c = text.char32At(index);
1238
1239            // parse \s*
1240            if (cpat == '~') {
1241                if (PatternProps.isWhiteSpace(c)) {
1242                    index += UTF16.getCharCount(c);
1243                    continue;
1244                } else {
1245                    if (++ipat == pat.length()) {
1246                        return index; // success; c unparsed
1247                    }
1248                    // fall thru; process c again with next cpat
1249                }
1250            }
1251
1252            // parse literal
1253            else if (c == cpat) {
1254                int n = UTF16.getCharCount(c);
1255                index += n;
1256                ipat += n;
1257                if (ipat == pat.length()) {
1258                    return index; // success; c parsed
1259                }
1260                // fall thru; get next cpat
1261            }
1262
1263            // match failure of literal
1264            else {
1265                return -1;
1266            }
1267
1268            cpat = UTF16.charAt(pat, ipat);
1269        }
1270
1271        return -1; // text ended before end of pat
1272    }
1273
1274    /**
1275     * Parse an integer at pos, either of the form \d+ or of the form
1276     * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
1277     * or octal format.
1278     * @param pos INPUT-OUTPUT parameter.  On input, the first
1279     * character to parse.  On output, the character after the last
1280     * parsed character.
1281     */
1282    public static int parseInteger(String rule, int[] pos, int limit) {
1283        int count = 0;
1284        int value = 0;
1285        int p = pos[0];
1286        int radix = 10;
1287
1288        if (rule.regionMatches(true, p, "0x", 0, 2)) {
1289            p += 2;
1290            radix = 16;
1291        } else if (p < limit && rule.charAt(p) == '0') {
1292            p++;
1293            count = 1;
1294            radix = 8;
1295        }
1296
1297        while (p < limit) {
1298            int d = UCharacter.digit(rule.charAt(p++), radix);
1299            if (d < 0) {
1300                --p;
1301                break;
1302            }
1303            ++count;
1304            int v = (value * radix) + d;
1305            if (v <= value) {
1306                // If there are too many input digits, at some point
1307                // the value will go negative, e.g., if we have seen
1308                // "0x8000000" already and there is another '0', when
1309                // we parse the next 0 the value will go negative.
1310                return 0;
1311            }
1312            value = v;
1313        }
1314        if (count > 0) {
1315            pos[0] = p;
1316        }
1317        return value;
1318    }
1319
1320    /**
1321     * Parse a Unicode identifier from the given string at the given
1322     * position.  Return the identifier, or null if there is no
1323     * identifier.
1324     * @param str the string to parse
1325     * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
1326     * first character to examine.  It must be less than str.length(),
1327     * and it must not point to a whitespace character.  That is, must
1328     * have pos[0] < str.length().  On
1329     * OUTPUT, the position after the last parsed character.
1330     * @return the Unicode identifier, or null if there is no valid
1331     * identifier at pos[0].
1332     */
1333    public static String parseUnicodeIdentifier(String str, int[] pos) {
1334        // assert(pos[0] < str.length());
1335        StringBuilder buf = new StringBuilder();
1336        int p = pos[0];
1337        while (p < str.length()) {
1338            int ch = Character.codePointAt(str, p);
1339            if (buf.length() == 0) {
1340                if (UCharacter.isUnicodeIdentifierStart(ch)) {
1341                    buf.appendCodePoint(ch);
1342                } else {
1343                    return null;
1344                }
1345            } else {
1346                if (UCharacter.isUnicodeIdentifierPart(ch)) {
1347                    buf.appendCodePoint(ch);
1348                } else {
1349                    break;
1350                }
1351            }
1352            p += UTF16.getCharCount(ch);
1353        }
1354        pos[0] = p;
1355        return buf.toString();
1356    }
1357
1358    static final char DIGITS[] = {
1359        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
1360        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
1361        'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
1362        'U', 'V', 'W', 'X', 'Y', 'Z'
1363    };
1364
1365    /**
1366     * Append the digits of a positive integer to the given
1367     * <code>Appendable</code> in the given radix. This is
1368     * done recursively since it is easiest to generate the low-
1369     * order digit first, but it must be appended last.
1370     *
1371     * @param result is the <code>Appendable</code> to append to
1372     * @param n is the positive integer
1373     * @param radix is the radix, from 2 to 36 inclusive
1374     * @param minDigits is the minimum number of digits to append.
1375     */
1376    private static <T extends Appendable> void recursiveAppendNumber(T result, int n,
1377            int radix, int minDigits)
1378    {
1379        try {
1380            int digit = n % radix;
1381
1382            if (n >= radix || minDigits > 1) {
1383                recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
1384            }
1385            result.append(DIGITS[digit]);
1386        } catch (IOException e) {
1387            throw new IllegalIcuArgumentException(e);
1388        }
1389    }
1390
1391    /**
1392     * Append a number to the given Appendable in the given radix.
1393     * Standard digits '0'-'9' are used and letters 'A'-'Z' for
1394     * radices 11 through 36.
1395     * @param result the digits of the number are appended here
1396     * @param n the number to be converted to digits; may be negative.
1397     * If negative, a '-' is prepended to the digits.
1398     * @param radix a radix from 2 to 36 inclusive.
1399     * @param minDigits the minimum number of digits, not including
1400     * any '-', to produce.  Values less than 2 have no effect.  One
1401     * digit is always emitted regardless of this parameter.
1402     * @return a reference to result
1403     */
1404    public static <T extends Appendable> T appendNumber(T result, int n,
1405            int radix, int minDigits)
1406    {
1407        try {
1408            if (radix < 2 || radix > 36) {
1409                throw new IllegalArgumentException("Illegal radix " + radix);
1410            }
1411
1412
1413            int abs = n;
1414
1415            if (n < 0) {
1416                abs = -n;
1417                result.append("-");
1418            }
1419
1420            recursiveAppendNumber(result, abs, radix, minDigits);
1421
1422            return result;
1423        } catch (IOException e) {
1424            throw new IllegalIcuArgumentException(e);
1425        }
1426
1427    }
1428
1429    /**
1430     * Parse an unsigned 31-bit integer at the given offset.  Use
1431     * UCharacter.digit() to parse individual characters into digits.
1432     * @param text the text to be parsed
1433     * @param pos INPUT-OUTPUT parameter.  On entry, pos[0] is the
1434     * offset within text at which to start parsing; it should point
1435     * to a valid digit.  On exit, pos[0] is the offset after the last
1436     * parsed character.  If the parse failed, it will be unchanged on
1437     * exit.  Must be >= 0 on entry.
1438     * @param radix the radix in which to parse; must be >= 2 and <=
1439     * 36.
1440     * @return a non-negative parsed number, or -1 upon parse failure.
1441     * Parse fails if there are no digits, that is, if pos[0] does not
1442     * point to a valid digit on entry, or if the number to be parsed
1443     * does not fit into a 31-bit unsigned integer.
1444     */
1445    public static int parseNumber(String text, int[] pos, int radix) {
1446        // assert(pos[0] >= 0);
1447        // assert(radix >= 2);
1448        // assert(radix <= 36);
1449        int n = 0;
1450        int p = pos[0];
1451        while (p < text.length()) {
1452            int ch = Character.codePointAt(text, p);
1453            int d = UCharacter.digit(ch, radix);
1454            if (d < 0) {
1455                break;
1456            }
1457            n = radix*n + d;
1458            // ASSUME that when a 32-bit integer overflows it becomes
1459            // negative.  E.g., 214748364 * 10 + 8 => negative value.
1460            if (n < 0) {
1461                return -1;
1462            }
1463            ++p;
1464        }
1465        if (p == pos[0]) {
1466            return -1;
1467        }
1468        pos[0] = p;
1469        return n;
1470    }
1471
1472    /**
1473     * Return true if the character is NOT printable ASCII.  The tab,
1474     * newline and linefeed characters are considered unprintable.
1475     */
1476    public static boolean isUnprintable(int c) {
1477        //0x20 = 32 and 0x7E = 126
1478        return !(c >= 0x20 && c <= 0x7E);
1479    }
1480
1481    /**
1482     * Escape unprintable characters using <backslash>uxxxx notation
1483     * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
1484     * above.  If the character is printable ASCII, then do nothing
1485     * and return FALSE.  Otherwise, append the escaped notation and
1486     * return TRUE.
1487     */
1488    public static <T extends Appendable> boolean escapeUnprintable(T result, int c) {
1489        try {
1490            if (isUnprintable(c)) {
1491                result.append('\\');
1492                if ((c & ~0xFFFF) != 0) {
1493                    result.append('U');
1494                    result.append(DIGITS[0xF&(c>>28)]);
1495                    result.append(DIGITS[0xF&(c>>24)]);
1496                    result.append(DIGITS[0xF&(c>>20)]);
1497                    result.append(DIGITS[0xF&(c>>16)]);
1498                } else {
1499                    result.append('u');
1500                }
1501                result.append(DIGITS[0xF&(c>>12)]);
1502                result.append(DIGITS[0xF&(c>>8)]);
1503                result.append(DIGITS[0xF&(c>>4)]);
1504                result.append(DIGITS[0xF&c]);
1505                return true;
1506            }
1507            return false;
1508        } catch (IOException e) {
1509            throw new IllegalIcuArgumentException(e);
1510        }
1511    }
1512
1513    /**
1514     * Returns the index of the first character in a set, ignoring quoted text.
1515     * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
1516     * found by a search for "h".  Unlike String.indexOf(), this method searches
1517     * not for a single character, but for any character of the string
1518     * <code>setOfChars</code>.
1519     * @param text text to be searched
1520     * @param start the beginning index, inclusive; <code>0 <= start
1521     * <= limit</code>.
1522     * @param limit the ending index, exclusive; <code>start <= limit
1523     * <= text.length()</code>.
1524     * @param setOfChars string with one or more distinct characters
1525     * @return Offset of the first character in <code>setOfChars</code>
1526     * found, or -1 if not found.
1527     * @see String#indexOf
1528     */
1529    public static int quotedIndexOf(String text, int start, int limit,
1530            String setOfChars) {
1531        for (int i=start; i<limit; ++i) {
1532            char c = text.charAt(i);
1533            if (c == BACKSLASH) {
1534                ++i;
1535            } else if (c == APOSTROPHE) {
1536                while (++i < limit
1537                        && text.charAt(i) != APOSTROPHE) {}
1538            } else if (setOfChars.indexOf(c) >= 0) {
1539                return i;
1540            }
1541        }
1542        return -1;
1543    }
1544
1545    /**
1546     * Append a character to a rule that is being built up.  To flush
1547     * the quoteBuf to rule, make one final call with isLiteral == true.
1548     * If there is no final character, pass in (int)-1 as c.
1549     * @param rule the string to append the character to
1550     * @param c the character to append, or (int)-1 if none.
1551     * @param isLiteral if true, then the given character should not be
1552     * quoted or escaped.  Usually this means it is a syntactic element
1553     * such as > or $
1554     * @param escapeUnprintable if true, then unprintable characters
1555     * should be escaped using escapeUnprintable().  These escapes will
1556     * appear outside of quotes.
1557     * @param quoteBuf a buffer which is used to build up quoted
1558     * substrings.  The caller should initially supply an empty buffer,
1559     * and thereafter should not modify the buffer.  The buffer should be
1560     * cleared out by, at the end, calling this method with a literal
1561     * character (which may be -1).
1562     */
1563    public static void appendToRule(StringBuffer rule,
1564            int c,
1565            boolean isLiteral,
1566            boolean escapeUnprintable,
1567            StringBuffer quoteBuf) {
1568        // If we are escaping unprintables, then escape them outside
1569        // quotes.  \\u and \\U are not recognized within quotes.  The same
1570        // logic applies to literals, but literals are never escaped.
1571        if (isLiteral ||
1572                (escapeUnprintable && Utility.isUnprintable(c))) {
1573            if (quoteBuf.length() > 0) {
1574                // We prefer backslash APOSTROPHE to double APOSTROPHE
1575                // (more readable, less similar to ") so if there are
1576                // double APOSTROPHEs at the ends, we pull them outside
1577                // of the quote.
1578
1579                // If the first thing in the quoteBuf is APOSTROPHE
1580                // (doubled) then pull it out.
1581                while (quoteBuf.length() >= 2 &&
1582                        quoteBuf.charAt(0) == APOSTROPHE &&
1583                        quoteBuf.charAt(1) == APOSTROPHE) {
1584                    rule.append(BACKSLASH).append(APOSTROPHE);
1585                    quoteBuf.delete(0, 2);
1586                }
1587                // If the last thing in the quoteBuf is APOSTROPHE
1588                // (doubled) then remove and count it and add it after.
1589                int trailingCount = 0;
1590                while (quoteBuf.length() >= 2 &&
1591                        quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE &&
1592                        quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) {
1593                    quoteBuf.setLength(quoteBuf.length()-2);
1594                    ++trailingCount;
1595                }
1596                if (quoteBuf.length() > 0) {
1597                    rule.append(APOSTROPHE);
1598                    rule.append(quoteBuf);
1599                    rule.append(APOSTROPHE);
1600                    quoteBuf.setLength(0);
1601                }
1602                while (trailingCount-- > 0) {
1603                    rule.append(BACKSLASH).append(APOSTROPHE);
1604                }
1605            }
1606            if (c != -1) {
1607                /* Since spaces are ignored during parsing, they are
1608                 * emitted only for readability.  We emit one here
1609                 * only if there isn't already one at the end of the
1610                 * rule.
1611                 */
1612                if (c == ' ') {
1613                    int len = rule.length();
1614                    if (len > 0 && rule.charAt(len-1) != ' ') {
1615                        rule.append(' ');
1616                    }
1617                } else if (!escapeUnprintable || !Utility.escapeUnprintable(rule, c)) {
1618                    rule.appendCodePoint(c);
1619                }
1620            }
1621        }
1622
1623        // Escape ' and '\' and don't begin a quote just for them
1624        else if (quoteBuf.length() == 0 &&
1625                (c == APOSTROPHE || c == BACKSLASH)) {
1626            rule.append(BACKSLASH).append((char)c);
1627        }
1628
1629        // Specials (printable ascii that isn't [0-9a-zA-Z]) and
1630        // whitespace need quoting.  Also append stuff to quotes if we are
1631        // building up a quoted substring already.
1632        else if (quoteBuf.length() > 0 ||
1633                (c >= 0x0021 && c <= 0x007E &&
1634                        !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
1635                                (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
1636                                (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) ||
1637                                PatternProps.isWhiteSpace(c)) {
1638            quoteBuf.appendCodePoint(c);
1639            // Double ' within a quote
1640            if (c == APOSTROPHE) {
1641                quoteBuf.append((char)c);
1642            }
1643        }
1644
1645        // Otherwise just append
1646        else {
1647            rule.appendCodePoint(c);
1648        }
1649    }
1650
1651    /**
1652     * Append the given string to the rule.  Calls the single-character
1653     * version of appendToRule for each character.
1654     */
1655    public static void appendToRule(StringBuffer rule,
1656            String text,
1657            boolean isLiteral,
1658            boolean escapeUnprintable,
1659            StringBuffer quoteBuf) {
1660        for (int i=0; i<text.length(); ++i) {
1661            // Okay to process in 16-bit code units here
1662            appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
1663        }
1664    }
1665
1666    /**
1667     * Given a matcher reference, which may be null, append its
1668     * pattern as a literal to the given rule.
1669     */
1670    public static void appendToRule(StringBuffer rule,
1671            UnicodeMatcher matcher,
1672            boolean escapeUnprintable,
1673            StringBuffer quoteBuf) {
1674        if (matcher != null) {
1675            appendToRule(rule, matcher.toPattern(escapeUnprintable),
1676                    true, escapeUnprintable, quoteBuf);
1677        }
1678    }
1679
1680    /**
1681     * Compares 2 unsigned integers
1682     * @param source 32 bit unsigned integer
1683     * @param target 32 bit unsigned integer
1684     * @return 0 if equals, 1 if source is greater than target and -1
1685     *         otherwise
1686     */
1687    public static final int compareUnsigned(int source, int target)
1688    {
1689        source += MAGIC_UNSIGNED;
1690        target += MAGIC_UNSIGNED;
1691        if (source < target) {
1692            return -1;
1693        }
1694        else if (source > target) {
1695            return 1;
1696        }
1697        return 0;
1698    }
1699
1700    /**
1701     * Find the highest bit in a positive integer. This is done
1702     * by doing a binary search through the bits.
1703     *
1704     * @param n is the integer
1705     *
1706     * @return the bit number of the highest bit, with 0 being
1707     * the low order bit, or -1 if <code>n</code> is not positive
1708     */
1709    public static final byte highBit(int n)
1710    {
1711        if (n <= 0) {
1712            return -1;
1713        }
1714
1715        byte bit = 0;
1716
1717        if (n >= 1 << 16) {
1718            n >>= 16;
1719        bit += 16;
1720        }
1721
1722        if (n >= 1 << 8) {
1723            n >>= 8;
1724        bit += 8;
1725        }
1726
1727        if (n >= 1 << 4) {
1728            n >>= 4;
1729        bit += 4;
1730        }
1731
1732        if (n >= 1 << 2) {
1733            n >>= 2;
1734        bit += 2;
1735        }
1736
1737        if (n >= 1 << 1) {
1738            n >>= 1;
1739        bit += 1;
1740        }
1741
1742        return bit;
1743    }
1744    /**
1745     * Utility method to take a int[] containing codepoints and return
1746     * a string representation with code units.
1747     */
1748    public static String valueOf(int[]source){
1749        // TODO: Investigate why this method is not on UTF16 class
1750        StringBuilder result = new StringBuilder(source.length);
1751        for(int i=0; i<source.length; i++){
1752            result.appendCodePoint(source[i]);
1753        }
1754        return result.toString();
1755    }
1756
1757
1758    /**
1759     * Utility to duplicate a string count times
1760     * @param s String to be duplicated.
1761     * @param count Number of times to duplicate a string.
1762     */
1763    public static String repeat(String s, int count) {
1764        if (count <= 0) return "";
1765        if (count == 1) return s;
1766        StringBuilder result = new StringBuilder();
1767        for (int i = 0; i < count; ++i) {
1768            result.append(s);
1769        }
1770        return result.toString();
1771    }
1772
1773    public static String[] splitString(String src, String target) {
1774        return src.split("\\Q" + target + "\\E");
1775    }
1776
1777    /**
1778     * Split the string at runs of ascii whitespace characters.
1779     */
1780    public static String[] splitWhitespace(String src) {
1781        return src.split("\\s+");
1782    }
1783
1784    /**
1785     * Parse a list of hex numbers and return a string
1786     * @param string String of hex numbers.
1787     * @param minLength Minimal length.
1788     * @param separator Separator.
1789     * @return A string from hex numbers.
1790     */
1791    public static String fromHex(String string, int minLength, String separator) {
1792        return fromHex(string, minLength, Pattern.compile(separator != null ? separator : "\\s+"));
1793    }
1794
1795    /**
1796     * Parse a list of hex numbers and return a string
1797     * @param string String of hex numbers.
1798     * @param minLength Minimal length.
1799     * @param separator Separator.
1800     * @return A string from hex numbers.
1801     */
1802    public static String fromHex(String string, int minLength, Pattern separator) {
1803        StringBuilder buffer = new StringBuilder();
1804        String[] parts = separator.split(string);
1805        for (String part : parts) {
1806            if (part.length() < minLength) {
1807                throw new IllegalArgumentException("code point too short: " + part);
1808            }
1809            int cp = Integer.parseInt(part, 16);
1810            buffer.appendCodePoint(cp);
1811        }
1812        return buffer.toString();
1813    }
1814}
1815