1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/**
5 *******************************************************************************
6 * Copyright (C) 2000-2010, International Business Machines Corporation and    *
7 * others. All Rights Reserved.                                                *
8 *******************************************************************************
9 */
10package android.icu.dev.test.translit;
11
12import android.icu.text.UTF16;
13import android.icu.text.UnicodeSet;
14
15public final class TestUtility {
16
17    public static String hex(char ch) {
18        String foo = Integer.toString(ch,16).toUpperCase();
19        return "0000".substring(0,4-foo.length()) + foo;
20    }
21
22    public static String hex(int ch) {
23        String foo = Integer.toString(ch,16).toUpperCase();
24        return "00000000".substring(0,4-foo.length()) + foo;
25    }
26
27    public static String hex(String s) {
28      return hex(s,",");
29    }
30
31    public static String hex(String s, String sep) {
32      if (s.length() == 0) return "";
33      String result = hex(s.charAt(0));
34      for (int i = 1; i < s.length(); ++i) {
35        result += sep;
36        result += hex(s.charAt(i));
37      }
38      return result;
39    }
40
41    public static String replace(String source, String toBeReplaced, String replacement) {
42        StringBuffer results = new StringBuffer();
43        int len = toBeReplaced.length();
44        for (int i = 0; i < source.length(); ++i) {
45            if (source.regionMatches(false, i, toBeReplaced, 0, len)) {
46                results.append(replacement);
47                i += len - 1; // minus one, since we will increment
48            } else {
49                results.append(source.charAt(i));
50            }
51        }
52        return results.toString();
53    }
54
55    public static String replaceAll(String source, UnicodeSet set, String replacement) {
56        StringBuffer results = new StringBuffer();
57        int cp;
58        for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
59            cp = UTF16.charAt(source,i);
60            if (set.contains(cp)) {
61                results.append(replacement);
62            } else {
63                UTF16.append(results, cp);
64            }
65        }
66        return results.toString();
67    }
68
69    // COMMENTED OUT ALL THE OLD SCRIPT STUFF
70    /*
71    public static byte getScript(char c) {
72      return getScript(getBlock(c));
73    }
74
75    public static byte getScript(byte block) {
76      return blockToScript[block];
77    }
78
79    public static byte getBlock(char c) {
80      int index = c >> 7;
81      byte block = charToBlock[index];
82      while (block < 0) { // take care of exceptions, blocks split across 128 boundaries
83          int[] tuple = split[-block-1];
84          if (c < tuple[0]) block = (byte)tuple[1];
85          else block = (byte)tuple[2];
86      }
87      return block;
88    }
89
90    // returns next letter of script, or 0xFFFF if done
91
92    public static char getNextLetter(char c, byte script) {
93        while (c < 0xFFFF) {
94            ++c;
95            if (getScript(c) == script && Character.isLetter(c)) {
96                return c;
97            }
98        }
99        return c;
100    }
101
102    // Supplements to Character methods; these methods go through
103    // UCharacter if possible.  If not, they fall back to Character.
104
105    public static boolean isUnassigned(char c) {
106        try {
107            return UCharacter.getType(c) == UCharacterCategory.UNASSIGNED;
108        } catch (NullPointerException e) {
109            System.out.print("");
110        }
111        return Character.getType(c) == Character.UNASSIGNED;
112    }
113
114    public static boolean isLetter(char c) {
115        try {
116            return UCharacter.isLetter(c);
117        } catch (NullPointerException e) {
118            System.out.print("");
119        }
120        return Character.isLetter(c);
121    }
122
123  public static void main(String[] args) {
124    System.out.println("Blocks: ");
125    byte lastblock = -128;
126    for (char cc = 0; cc < 0xFFFF; ++cc) {
127      byte block = TestUtility.getBlock(cc);
128      if (block != lastblock) {
129        System.out.println(TestUtility.hex(cc) + "\t" + block);
130        lastblock = block;
131      }
132    }
133    System.out.println();
134    System.out.println("Scripts: ");
135    byte lastScript = -128;
136    for (char cc = 0; cc < 0xFFFF; ++cc) {
137      byte script = TestUtility.getScript(cc);
138      if (script != lastScript) {
139        System.out.println(TestUtility.hex(cc) + "\t" + script);
140        lastScript = script;
141      }
142    }
143  }
144
145
146
147    public static final byte // SCRIPT CODE
148        COMMON_SCRIPT = 0,
149        LATIN_SCRIPT = 1,
150        GREEK_SCRIPT = 2,
151        CYRILLIC_SCRIPT = 3,
152        ARMENIAN_SCRIPT = 4,
153        HEBREW_SCRIPT = 5,
154        ARABIC_SCRIPT = 6,
155        SYRIAC_SCRIPT = 7,
156        THAANA_SCRIPT = 8,
157        DEVANAGARI_SCRIPT = 9,
158        BENGALI_SCRIPT = 10,
159        GURMUKHI_SCRIPT = 11,
160        GUJARATI_SCRIPT = 12,
161        ORIYA_SCRIPT = 13,
162        TAMIL_SCRIPT = 14,
163        TELUGU_SCRIPT = 15,
164        KANNADA_SCRIPT = 16,
165        MALAYALAM_SCRIPT = 17,
166        SINHALA_SCRIPT = 18,
167        THAI_SCRIPT = 19,
168        LAO_SCRIPT = 20,
169        TIBETAN_SCRIPT = 21,
170        MYANMAR_SCRIPT = 22,
171        GEORGIAN_SCRIPT = 23,
172        JAMO_SCRIPT = 24,
173        HANGUL_SCRIPT = 25,
174        ETHIOPIC_SCRIPT = 26,
175        CHEROKEE_SCRIPT = 27,
176        ABORIGINAL_SCRIPT = 28,
177        OGHAM_SCRIPT = 29,
178        RUNIC_SCRIPT = 30,
179        KHMER_SCRIPT = 31,
180        MONGOLIAN_SCRIPT = 32,
181        HIRAGANA_SCRIPT = 33,
182        KATAKANA_SCRIPT = 34,
183        BOPOMOFO_SCRIPT = 35,
184        HAN_SCRIPT = 36,
185        YI_SCRIPT = 37;
186
187    public static final byte // block code
188        RESERVED_BLOCK = 0,
189        BASIC_LATIN = 1,
190        LATIN_1_SUPPLEMENT = 2,
191        LATIN_EXTENDED_A = 3,
192        LATIN_EXTENDED_B = 4,
193        IPA_EXTENSIONS = 5,
194        SPACING_MODIFIER_LETTERS = 6,
195        COMBINING_DIACRITICAL_MARKS = 7,
196        GREEK = 8,
197        CYRILLIC = 9,
198        ARMENIAN = 10,
199        HEBREW = 11,
200        ARABIC = 12,
201        SYRIAC = 13,
202        THAANA = 14,
203        DEVANAGARI = 15,
204        BENGALI = 16,
205        GURMUKHI = 17,
206        GUJARATI = 18,
207        ORIYA = 19,
208        TAMIL = 20,
209        TELUGU = 21,
210        KANNADA = 22,
211        MALAYALAM = 23,
212        SINHALA = 24,
213        THAI = 25,
214        LAO = 26,
215        TIBETAN = 27,
216        MYANMAR = 28,
217        GEORGIAN = 29,
218        HANGUL_JAMO = 30,
219        ETHIOPIC = 31,
220        CHEROKEE = 32,
221        UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33,
222        OGHAM = 34,
223        RUNIC = 35,
224        KHMER = 36,
225        MONGOLIAN = 37,
226        LATIN_EXTENDED_ADDITIONAL = 38,
227        GREEK_EXTENDED = 39,
228        GENERAL_PUNCTUATION = 40,
229        SUPERSCRIPTS_AND_SUBSCRIPTS = 41,
230        CURRENCY_SYMBOLS = 42,
231        COMBINING_MARKS_FOR_SYMBOLS = 43,
232        LETTERLIKE_SYMBOLS = 44,
233        NUMBER_FORMS = 45,
234        ARROWS = 46,
235        MATHEMATICAL_OPERATORS = 47,
236        MISCELLANEOUS_TECHNICAL = 48,
237        CONTROL_PICTURES = 49,
238        OPTICAL_CHARACTER_RECOGNITION = 50,
239        ENCLOSED_ALPHANUMERICS = 51,
240        BOX_DRAWING = 52,
241        BLOCK_ELEMENTS = 53,
242        GEOMETRIC_SHAPES = 54,
243        MISCELLANEOUS_SYMBOLS = 55,
244        DINGBATS = 56,
245        BRAILLE_PATTERNS = 57,
246        CJK_RADICALS_SUPPLEMENT = 58,
247        KANGXI_RADICALS = 59,
248        IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60,
249        CJK_SYMBOLS_AND_PUNCTUATION = 61,
250        HIRAGANA = 62,
251        KATAKANA = 63,
252        BOPOMOFO = 64,
253        HANGUL_COMPATIBILITY_JAMO = 65,
254        KANBUN = 66,
255        BOPOMOFO_EXTENDED = 67,
256        ENCLOSED_CJK_LETTERS_AND_MONTHS = 68,
257        CJK_COMPATIBILITY = 69,
258        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70,
259        CJK_UNIFIED_IDEOGRAPHS = 71,
260        YI_SYLLABLES = 72,
261        YI_RADICALS = 73,
262        HANGUL_SYLLABLES = 74,
263        HIGH_SURROGATES = 75,
264        HIGH_PRIVATE_USE_SURROGATES = 76,
265        LOW_SURROGATES = 77,
266        PRIVATE_USE = 78,
267        CJK_COMPATIBILITY_IDEOGRAPHS = 79,
268        ALPHABETIC_PRESENTATION_FORMS = 80,
269        ARABIC_PRESENTATION_FORMS_A = 81,
270        COMBINING_HALF_MARKS = 82,
271        CJK_COMPATIBILITY_FORMS = 83,
272        SMALL_FORM_VARIANTS = 84,
273        ARABIC_PRESENTATION_FORMS_B = 85,
274        SPECIALS = 86,
275        HALFWIDTH_AND_FULLWIDTH_FORMS = 87;
276
277    static final byte[] blockToScript = {
278        COMMON_SCRIPT, // 0, <RESERVED_BLOCK>
279        LATIN_SCRIPT, // 1, BASIC_LATIN
280        LATIN_SCRIPT, // 2, LATIN_1_SUPPLEMENT
281        LATIN_SCRIPT, // 3, LATIN_EXTENDED_A
282        LATIN_SCRIPT, // 4, LATIN_EXTENDED_B
283        LATIN_SCRIPT, // 5, IPA_EXTENSIONS
284        COMMON_SCRIPT, // 6, SPACING_MODIFIER_LETTERS
285        COMMON_SCRIPT, // 7, COMBINING_DIACRITICAL_MARKS
286        GREEK_SCRIPT, // 8, GREEK
287        CYRILLIC_SCRIPT, // 9, CYRILLIC
288        ARMENIAN_SCRIPT, // 10, ARMENIAN
289        HEBREW_SCRIPT, // 11, HEBREW
290        ARABIC_SCRIPT, // 12, ARABIC
291        SYRIAC_SCRIPT, // 13, SYRIAC
292        THAANA_SCRIPT, // 14, THAANA
293        DEVANAGARI_SCRIPT, // 15, DEVANAGARI
294        BENGALI_SCRIPT, // 16, BENGALI
295        GURMUKHI_SCRIPT, // 17, GURMUKHI
296        GUJARATI_SCRIPT, // 18, GUJARATI
297        ORIYA_SCRIPT, // 19, ORIYA
298        TAMIL_SCRIPT, // 20, TAMIL
299        TELUGU_SCRIPT, // 21, TELUGU
300        KANNADA_SCRIPT, // 22, KANNADA
301        MALAYALAM_SCRIPT, // 23, MALAYALAM
302        SINHALA_SCRIPT, // 24, SINHALA
303        THAI_SCRIPT, // 25, THAI
304        LAO_SCRIPT, // 26, LAO
305        TIBETAN_SCRIPT, // 27, TIBETAN
306        MYANMAR_SCRIPT, // 28, MYANMAR
307        GEORGIAN_SCRIPT, // 29, GEORGIAN
308        JAMO_SCRIPT, // 30, HANGUL_JAMO
309        ETHIOPIC_SCRIPT, // 31, ETHIOPIC
310        CHEROKEE_SCRIPT, // 32, CHEROKEE
311        ABORIGINAL_SCRIPT, // 33, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
312        OGHAM_SCRIPT, // 34, OGHAM
313        RUNIC_SCRIPT, // 35, RUNIC
314        KHMER_SCRIPT, // 36, KHMER
315        MONGOLIAN_SCRIPT, // 37, MONGOLIAN
316        LATIN_SCRIPT, // 38, LATIN_EXTENDED_ADDITIONAL
317        GREEK_SCRIPT, // 39, GREEK_EXTENDED
318        COMMON_SCRIPT, // 40, GENERAL_PUNCTUATION
319        COMMON_SCRIPT, // 41, SUPERSCRIPTS_AND_SUBSCRIPTS
320        COMMON_SCRIPT, // 42, CURRENCY_SYMBOLS
321        COMMON_SCRIPT, // 43, COMBINING_MARKS_FOR_SYMBOLS
322        COMMON_SCRIPT, // 44, LETTERLIKE_SYMBOLS
323        COMMON_SCRIPT, // 45, NUMBER_FORMS
324        COMMON_SCRIPT, // 46, ARROWS
325        COMMON_SCRIPT, // 47, MATHEMATICAL_OPERATORS
326        COMMON_SCRIPT, // 48, MISCELLANEOUS_TECHNICAL
327        COMMON_SCRIPT, // 49, CONTROL_PICTURES
328        COMMON_SCRIPT, // 50, OPTICAL_CHARACTER_RECOGNITION
329        COMMON_SCRIPT, // 51, ENCLOSED_ALPHANUMERICS
330        COMMON_SCRIPT, // 52, BOX_DRAWING
331        COMMON_SCRIPT, // 53, BLOCK_ELEMENTS
332        COMMON_SCRIPT, // 54, GEOMETRIC_SHAPES
333        COMMON_SCRIPT, // 55, MISCELLANEOUS_SYMBOLS
334        COMMON_SCRIPT, // 56, DINGBATS
335        COMMON_SCRIPT, // 57, BRAILLE_PATTERNS
336        HAN_SCRIPT, // 58, CJK_RADICALS_SUPPLEMENT
337        HAN_SCRIPT, // 59, KANGXI_RADICALS
338        HAN_SCRIPT, // 60, IDEOGRAPHIC_DESCRIPTION_CHARACTERS
339        COMMON_SCRIPT, // 61, CJK_SYMBOLS_AND_PUNCTUATION
340        HIRAGANA_SCRIPT, // 62, HIRAGANA
341        KATAKANA_SCRIPT, // 63, KATAKANA
342        BOPOMOFO_SCRIPT, // 64, BOPOMOFO
343        JAMO_SCRIPT, // 65, HANGUL_COMPATIBILITY_JAMO
344        HAN_SCRIPT, // 66, KANBUN
345        BOPOMOFO_SCRIPT, // 67, BOPOMOFO_EXTENDED
346        COMMON_SCRIPT, // 68, ENCLOSED_CJK_LETTERS_AND_MONTHS
347        COMMON_SCRIPT, // 69, CJK_COMPATIBILITY
348        HAN_SCRIPT, // 70, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
349        HAN_SCRIPT, // 71, CJK_UNIFIED_IDEOGRAPHS
350        YI_SCRIPT, // 72, YI_SYLLABLES
351        YI_SCRIPT, // 73, YI_RADICALS
352        HANGUL_SCRIPT, // 74, HANGUL_SYLLABLES
353        COMMON_SCRIPT, // 75, HIGH_SURROGATES
354        COMMON_SCRIPT, // 76, HIGH_PRIVATE_USE_SURROGATES
355        COMMON_SCRIPT, // 77, LOW_SURROGATES
356        COMMON_SCRIPT, // 78, PRIVATE_USE
357        HAN_SCRIPT, // 79, CJK_COMPATIBILITY_IDEOGRAPHS
358        COMMON_SCRIPT, // 80, ALPHABETIC_PRESENTATION_FORMS
359        ARABIC_SCRIPT, // 81, ARABIC_PRESENTATION_FORMS_A
360        COMMON_SCRIPT, // 82, COMBINING_HALF_MARKS
361        COMMON_SCRIPT, // 83, CJK_COMPATIBILITY_FORMS
362        COMMON_SCRIPT, // 84, SMALL_FORM_VARIANTS
363        ARABIC_SCRIPT, // 85, ARABIC_PRESENTATION_FORMS_B
364        COMMON_SCRIPT, // 86, SPECIALS
365        COMMON_SCRIPT, // 87, HALFWIDTH_AND_FULLWIDTH_FORMS
366        COMMON_SCRIPT, // 88, SPECIALS
367    };
368
369    // could be further reduced to a byte array, but I didn't bother.
370    static final int[][] split = {
371        {0x0250, 4, 5}, // -1
372        {0x02B0, 5, 6}, // -2
373        {0x0370, 7, 8}, // -3
374        {0x0530, 0, 10}, // -4
375        {0x0590, 10, 11}, // -5
376        {0x0750, 13, 0}, // -6
377        {0x07C0, 14, 0}, // -7
378        {0x10A0, 28, 29}, // -8
379        {0x13A0, 0, 32}, // -9
380        {0x16A0, 34, 35}, // -10
381        {0x18B0, 37, 0}, // -11
382        {0x2070, 40, 41}, // -12
383        {0x20A0, 41, -31}, // -13
384        {0x2150, 44, 45}, // -14
385        {0x2190, 45, 46}, // -15
386        {0x2440, 49, -32}, // -16
387        {0x25A0, 53, 54}, // -17
388        {0x27C0, 56, 0}, // -18
389        {0x2FE0, 59, -33}, // -19
390        {0x3040, 61, 62}, // -20
391        {0x30A0, 62, 63}, // -21
392        {0x3130, 64, 65}, // -22
393        {0x3190, 65, -34}, // -23
394        {0x4DB6, 70, 0}, // -24
395        {0xA490, 72, -35}, // -25
396        {0xD7A4, 74, 0}, // -26
397        {0xFB50, 80, 81}, // -27
398        {0xFE20, 0, -36}, // -28
399        {0xFEFF, 85, 86}, // -29
400        {0xFFF0, 87, -37}, // -30
401        {0x20D0, 42, 43}, // -31
402        {0x2460, 50, 51}, // -32
403        {0x2FF0, 0, 60}, // -33
404        {0x31A0, 66, -38}, // -34
405        {0xA4D0, 73, 0}, //-35
406        {0xFE30, 82, -39}, //-36
407        {0xFFFE, 88, 0}, //-37
408        {0x31C0, 67, 0}, // -38
409        {0xFE50, 83, -40}, //-39
410        {0xFE70, 84, 85} // -40
411    };
412
413    static final byte[] charToBlock = {
414      1, 2, 3, 4, -1, -2, -3, 8, 9, 9, -4, -5, 12, 12, -6, -7,
415      0, 0, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 27,
416      28, -8, 30, 30, 31, 31, 31, -9, 33, 33, 33, 33, 33, -10, 0, 36,
417      37, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 38, 39, 39,
418      -12, -13, -14, -15, 47, 47, 48, 48, -16, 51, 52, -17, 55, 55, 56, -18,
419      57, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 59, -19,
420      -20, -21, -22, -23, 68, 68, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70,
421      70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
422      70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
423      70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, -24, 71, 71, 71, 71,
424      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
425      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
426      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
427      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
428      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
429      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
430      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
431      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
432      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
433      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
434      72, 72, 72, 72, 72, 72, 72, 72, 72, -25, 0, 0, 0, 0, 0, 0,
435      0, 0, 0, 0, 0, 0, 0, 0, 74, 74, 74, 74, 74, 74, 74, 74,
436      74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
437      74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
438      74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
439      74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
440      74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, -26,
441      75, 75, 75, 75, 75, 75, 75, 76, 77, 77, 77, 77, 77, 77, 77, 77,
442      78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
443      78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
444      78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
445      78, 78, 79, 79, 79, 79, -27, 81, 81, 81, 81, 81, -28, -29, 87, -30
446    };
447    */
448}
449