1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/**
5 *******************************************************************************
6 * Copyright (C) 2000-2010, International Business Machines Corporation and    *
7 * others. All Rights Reserved.                                                *
8 *******************************************************************************
9 */
10package android.icu.dev.test.translit;
11
12import android.icu.text.UTF16;
13import android.icu.text.UnicodeSet;
14import android.icu.testsharding.MainTestShard;
15
16@MainTestShard
17public final class TestUtility {
18
19    public static String hex(char ch) {
20        String foo = Integer.toString(ch,16).toUpperCase();
21        return "0000".substring(0,4-foo.length()) + foo;
22    }
23
24    public static String hex(int ch) {
25        String foo = Integer.toString(ch,16).toUpperCase();
26        return "00000000".substring(0,4-foo.length()) + foo;
27    }
28
29    public static String hex(String s) {
30      return hex(s,",");
31    }
32
33    public static String hex(String s, String sep) {
34      if (s.length() == 0) return "";
35      String result = hex(s.charAt(0));
36      for (int i = 1; i < s.length(); ++i) {
37        result += sep;
38        result += hex(s.charAt(i));
39      }
40      return result;
41    }
42
43    public static String replace(String source, String toBeReplaced, String replacement) {
44        StringBuffer results = new StringBuffer();
45        int len = toBeReplaced.length();
46        for (int i = 0; i < source.length(); ++i) {
47            if (source.regionMatches(false, i, toBeReplaced, 0, len)) {
48                results.append(replacement);
49                i += len - 1; // minus one, since we will increment
50            } else {
51                results.append(source.charAt(i));
52            }
53        }
54        return results.toString();
55    }
56
57    public static String replaceAll(String source, UnicodeSet set, String replacement) {
58        StringBuffer results = new StringBuffer();
59        int cp;
60        for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
61            cp = UTF16.charAt(source,i);
62            if (set.contains(cp)) {
63                results.append(replacement);
64            } else {
65                UTF16.append(results, cp);
66            }
67        }
68        return results.toString();
69    }
70
71    // COMMENTED OUT ALL THE OLD SCRIPT STUFF
72    /*
73    public static byte getScript(char c) {
74      return getScript(getBlock(c));
75    }
76
77    public static byte getScript(byte block) {
78      return blockToScript[block];
79    }
80
81    public static byte getBlock(char c) {
82      int index = c >> 7;
83      byte block = charToBlock[index];
84      while (block < 0) { // take care of exceptions, blocks split across 128 boundaries
85          int[] tuple = split[-block-1];
86          if (c < tuple[0]) block = (byte)tuple[1];
87          else block = (byte)tuple[2];
88      }
89      return block;
90    }
91
92    // returns next letter of script, or 0xFFFF if done
93
94    public static char getNextLetter(char c, byte script) {
95        while (c < 0xFFFF) {
96            ++c;
97            if (getScript(c) == script && Character.isLetter(c)) {
98                return c;
99            }
100        }
101        return c;
102    }
103
104    // Supplements to Character methods; these methods go through
105    // UCharacter if possible.  If not, they fall back to Character.
106
107    public static boolean isUnassigned(char c) {
108        try {
109            return UCharacter.getType(c) == UCharacterCategory.UNASSIGNED;
110        } catch (NullPointerException e) {
111            System.out.print("");
112        }
113        return Character.getType(c) == Character.UNASSIGNED;
114    }
115
116    public static boolean isLetter(char c) {
117        try {
118            return UCharacter.isLetter(c);
119        } catch (NullPointerException e) {
120            System.out.print("");
121        }
122        return Character.isLetter(c);
123    }
124
125  public static void main(String[] args) {
126    System.out.println("Blocks: ");
127    byte lastblock = -128;
128    for (char cc = 0; cc < 0xFFFF; ++cc) {
129      byte block = TestUtility.getBlock(cc);
130      if (block != lastblock) {
131        System.out.println(TestUtility.hex(cc) + "\t" + block);
132        lastblock = block;
133      }
134    }
135    System.out.println();
136    System.out.println("Scripts: ");
137    byte lastScript = -128;
138    for (char cc = 0; cc < 0xFFFF; ++cc) {
139      byte script = TestUtility.getScript(cc);
140      if (script != lastScript) {
141        System.out.println(TestUtility.hex(cc) + "\t" + script);
142        lastScript = script;
143      }
144    }
145  }
146
147
148
149    public static final byte // SCRIPT CODE
150        COMMON_SCRIPT = 0,
151        LATIN_SCRIPT = 1,
152        GREEK_SCRIPT = 2,
153        CYRILLIC_SCRIPT = 3,
154        ARMENIAN_SCRIPT = 4,
155        HEBREW_SCRIPT = 5,
156        ARABIC_SCRIPT = 6,
157        SYRIAC_SCRIPT = 7,
158        THAANA_SCRIPT = 8,
159        DEVANAGARI_SCRIPT = 9,
160        BENGALI_SCRIPT = 10,
161        GURMUKHI_SCRIPT = 11,
162        GUJARATI_SCRIPT = 12,
163        ORIYA_SCRIPT = 13,
164        TAMIL_SCRIPT = 14,
165        TELUGU_SCRIPT = 15,
166        KANNADA_SCRIPT = 16,
167        MALAYALAM_SCRIPT = 17,
168        SINHALA_SCRIPT = 18,
169        THAI_SCRIPT = 19,
170        LAO_SCRIPT = 20,
171        TIBETAN_SCRIPT = 21,
172        MYANMAR_SCRIPT = 22,
173        GEORGIAN_SCRIPT = 23,
174        JAMO_SCRIPT = 24,
175        HANGUL_SCRIPT = 25,
176        ETHIOPIC_SCRIPT = 26,
177        CHEROKEE_SCRIPT = 27,
178        ABORIGINAL_SCRIPT = 28,
179        OGHAM_SCRIPT = 29,
180        RUNIC_SCRIPT = 30,
181        KHMER_SCRIPT = 31,
182        MONGOLIAN_SCRIPT = 32,
183        HIRAGANA_SCRIPT = 33,
184        KATAKANA_SCRIPT = 34,
185        BOPOMOFO_SCRIPT = 35,
186        HAN_SCRIPT = 36,
187        YI_SCRIPT = 37;
188
189    public static final byte // block code
190        RESERVED_BLOCK = 0,
191        BASIC_LATIN = 1,
192        LATIN_1_SUPPLEMENT = 2,
193        LATIN_EXTENDED_A = 3,
194        LATIN_EXTENDED_B = 4,
195        IPA_EXTENSIONS = 5,
196        SPACING_MODIFIER_LETTERS = 6,
197        COMBINING_DIACRITICAL_MARKS = 7,
198        GREEK = 8,
199        CYRILLIC = 9,
200        ARMENIAN = 10,
201        HEBREW = 11,
202        ARABIC = 12,
203        SYRIAC = 13,
204        THAANA = 14,
205        DEVANAGARI = 15,
206        BENGALI = 16,
207        GURMUKHI = 17,
208        GUJARATI = 18,
209        ORIYA = 19,
210        TAMIL = 20,
211        TELUGU = 21,
212        KANNADA = 22,
213        MALAYALAM = 23,
214        SINHALA = 24,
215        THAI = 25,
216        LAO = 26,
217        TIBETAN = 27,
218        MYANMAR = 28,
219        GEORGIAN = 29,
220        HANGUL_JAMO = 30,
221        ETHIOPIC = 31,
222        CHEROKEE = 32,
223        UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33,
224        OGHAM = 34,
225        RUNIC = 35,
226        KHMER = 36,
227        MONGOLIAN = 37,
228        LATIN_EXTENDED_ADDITIONAL = 38,
229        GREEK_EXTENDED = 39,
230        GENERAL_PUNCTUATION = 40,
231        SUPERSCRIPTS_AND_SUBSCRIPTS = 41,
232        CURRENCY_SYMBOLS = 42,
233        COMBINING_MARKS_FOR_SYMBOLS = 43,
234        LETTERLIKE_SYMBOLS = 44,
235        NUMBER_FORMS = 45,
236        ARROWS = 46,
237        MATHEMATICAL_OPERATORS = 47,
238        MISCELLANEOUS_TECHNICAL = 48,
239        CONTROL_PICTURES = 49,
240        OPTICAL_CHARACTER_RECOGNITION = 50,
241        ENCLOSED_ALPHANUMERICS = 51,
242        BOX_DRAWING = 52,
243        BLOCK_ELEMENTS = 53,
244        GEOMETRIC_SHAPES = 54,
245        MISCELLANEOUS_SYMBOLS = 55,
246        DINGBATS = 56,
247        BRAILLE_PATTERNS = 57,
248        CJK_RADICALS_SUPPLEMENT = 58,
249        KANGXI_RADICALS = 59,
250        IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60,
251        CJK_SYMBOLS_AND_PUNCTUATION = 61,
252        HIRAGANA = 62,
253        KATAKANA = 63,
254        BOPOMOFO = 64,
255        HANGUL_COMPATIBILITY_JAMO = 65,
256        KANBUN = 66,
257        BOPOMOFO_EXTENDED = 67,
258        ENCLOSED_CJK_LETTERS_AND_MONTHS = 68,
259        CJK_COMPATIBILITY = 69,
260        CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70,
261        CJK_UNIFIED_IDEOGRAPHS = 71,
262        YI_SYLLABLES = 72,
263        YI_RADICALS = 73,
264        HANGUL_SYLLABLES = 74,
265        HIGH_SURROGATES = 75,
266        HIGH_PRIVATE_USE_SURROGATES = 76,
267        LOW_SURROGATES = 77,
268        PRIVATE_USE = 78,
269        CJK_COMPATIBILITY_IDEOGRAPHS = 79,
270        ALPHABETIC_PRESENTATION_FORMS = 80,
271        ARABIC_PRESENTATION_FORMS_A = 81,
272        COMBINING_HALF_MARKS = 82,
273        CJK_COMPATIBILITY_FORMS = 83,
274        SMALL_FORM_VARIANTS = 84,
275        ARABIC_PRESENTATION_FORMS_B = 85,
276        SPECIALS = 86,
277        HALFWIDTH_AND_FULLWIDTH_FORMS = 87;
278
279    static final byte[] blockToScript = {
280        COMMON_SCRIPT, // 0, <RESERVED_BLOCK>
281        LATIN_SCRIPT, // 1, BASIC_LATIN
282        LATIN_SCRIPT, // 2, LATIN_1_SUPPLEMENT
283        LATIN_SCRIPT, // 3, LATIN_EXTENDED_A
284        LATIN_SCRIPT, // 4, LATIN_EXTENDED_B
285        LATIN_SCRIPT, // 5, IPA_EXTENSIONS
286        COMMON_SCRIPT, // 6, SPACING_MODIFIER_LETTERS
287        COMMON_SCRIPT, // 7, COMBINING_DIACRITICAL_MARKS
288        GREEK_SCRIPT, // 8, GREEK
289        CYRILLIC_SCRIPT, // 9, CYRILLIC
290        ARMENIAN_SCRIPT, // 10, ARMENIAN
291        HEBREW_SCRIPT, // 11, HEBREW
292        ARABIC_SCRIPT, // 12, ARABIC
293        SYRIAC_SCRIPT, // 13, SYRIAC
294        THAANA_SCRIPT, // 14, THAANA
295        DEVANAGARI_SCRIPT, // 15, DEVANAGARI
296        BENGALI_SCRIPT, // 16, BENGALI
297        GURMUKHI_SCRIPT, // 17, GURMUKHI
298        GUJARATI_SCRIPT, // 18, GUJARATI
299        ORIYA_SCRIPT, // 19, ORIYA
300        TAMIL_SCRIPT, // 20, TAMIL
301        TELUGU_SCRIPT, // 21, TELUGU
302        KANNADA_SCRIPT, // 22, KANNADA
303        MALAYALAM_SCRIPT, // 23, MALAYALAM
304        SINHALA_SCRIPT, // 24, SINHALA
305        THAI_SCRIPT, // 25, THAI
306        LAO_SCRIPT, // 26, LAO
307        TIBETAN_SCRIPT, // 27, TIBETAN
308        MYANMAR_SCRIPT, // 28, MYANMAR
309        GEORGIAN_SCRIPT, // 29, GEORGIAN
310        JAMO_SCRIPT, // 30, HANGUL_JAMO
311        ETHIOPIC_SCRIPT, // 31, ETHIOPIC
312        CHEROKEE_SCRIPT, // 32, CHEROKEE
313        ABORIGINAL_SCRIPT, // 33, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
314        OGHAM_SCRIPT, // 34, OGHAM
315        RUNIC_SCRIPT, // 35, RUNIC
316        KHMER_SCRIPT, // 36, KHMER
317        MONGOLIAN_SCRIPT, // 37, MONGOLIAN
318        LATIN_SCRIPT, // 38, LATIN_EXTENDED_ADDITIONAL
319        GREEK_SCRIPT, // 39, GREEK_EXTENDED
320        COMMON_SCRIPT, // 40, GENERAL_PUNCTUATION
321        COMMON_SCRIPT, // 41, SUPERSCRIPTS_AND_SUBSCRIPTS
322        COMMON_SCRIPT, // 42, CURRENCY_SYMBOLS
323        COMMON_SCRIPT, // 43, COMBINING_MARKS_FOR_SYMBOLS
324        COMMON_SCRIPT, // 44, LETTERLIKE_SYMBOLS
325        COMMON_SCRIPT, // 45, NUMBER_FORMS
326        COMMON_SCRIPT, // 46, ARROWS
327        COMMON_SCRIPT, // 47, MATHEMATICAL_OPERATORS
328        COMMON_SCRIPT, // 48, MISCELLANEOUS_TECHNICAL
329        COMMON_SCRIPT, // 49, CONTROL_PICTURES
330        COMMON_SCRIPT, // 50, OPTICAL_CHARACTER_RECOGNITION
331        COMMON_SCRIPT, // 51, ENCLOSED_ALPHANUMERICS
332        COMMON_SCRIPT, // 52, BOX_DRAWING
333        COMMON_SCRIPT, // 53, BLOCK_ELEMENTS
334        COMMON_SCRIPT, // 54, GEOMETRIC_SHAPES
335        COMMON_SCRIPT, // 55, MISCELLANEOUS_SYMBOLS
336        COMMON_SCRIPT, // 56, DINGBATS
337        COMMON_SCRIPT, // 57, BRAILLE_PATTERNS
338        HAN_SCRIPT, // 58, CJK_RADICALS_SUPPLEMENT
339        HAN_SCRIPT, // 59, KANGXI_RADICALS
340        HAN_SCRIPT, // 60, IDEOGRAPHIC_DESCRIPTION_CHARACTERS
341        COMMON_SCRIPT, // 61, CJK_SYMBOLS_AND_PUNCTUATION
342        HIRAGANA_SCRIPT, // 62, HIRAGANA
343        KATAKANA_SCRIPT, // 63, KATAKANA
344        BOPOMOFO_SCRIPT, // 64, BOPOMOFO
345        JAMO_SCRIPT, // 65, HANGUL_COMPATIBILITY_JAMO
346        HAN_SCRIPT, // 66, KANBUN
347        BOPOMOFO_SCRIPT, // 67, BOPOMOFO_EXTENDED
348        COMMON_SCRIPT, // 68, ENCLOSED_CJK_LETTERS_AND_MONTHS
349        COMMON_SCRIPT, // 69, CJK_COMPATIBILITY
350        HAN_SCRIPT, // 70, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
351        HAN_SCRIPT, // 71, CJK_UNIFIED_IDEOGRAPHS
352        YI_SCRIPT, // 72, YI_SYLLABLES
353        YI_SCRIPT, // 73, YI_RADICALS
354        HANGUL_SCRIPT, // 74, HANGUL_SYLLABLES
355        COMMON_SCRIPT, // 75, HIGH_SURROGATES
356        COMMON_SCRIPT, // 76, HIGH_PRIVATE_USE_SURROGATES
357        COMMON_SCRIPT, // 77, LOW_SURROGATES
358        COMMON_SCRIPT, // 78, PRIVATE_USE
359        HAN_SCRIPT, // 79, CJK_COMPATIBILITY_IDEOGRAPHS
360        COMMON_SCRIPT, // 80, ALPHABETIC_PRESENTATION_FORMS
361        ARABIC_SCRIPT, // 81, ARABIC_PRESENTATION_FORMS_A
362        COMMON_SCRIPT, // 82, COMBINING_HALF_MARKS
363        COMMON_SCRIPT, // 83, CJK_COMPATIBILITY_FORMS
364        COMMON_SCRIPT, // 84, SMALL_FORM_VARIANTS
365        ARABIC_SCRIPT, // 85, ARABIC_PRESENTATION_FORMS_B
366        COMMON_SCRIPT, // 86, SPECIALS
367        COMMON_SCRIPT, // 87, HALFWIDTH_AND_FULLWIDTH_FORMS
368        COMMON_SCRIPT, // 88, SPECIALS
369    };
370
371    // could be further reduced to a byte array, but I didn't bother.
372    static final int[][] split = {
373        {0x0250, 4, 5}, // -1
374        {0x02B0, 5, 6}, // -2
375        {0x0370, 7, 8}, // -3
376        {0x0530, 0, 10}, // -4
377        {0x0590, 10, 11}, // -5
378        {0x0750, 13, 0}, // -6
379        {0x07C0, 14, 0}, // -7
380        {0x10A0, 28, 29}, // -8
381        {0x13A0, 0, 32}, // -9
382        {0x16A0, 34, 35}, // -10
383        {0x18B0, 37, 0}, // -11
384        {0x2070, 40, 41}, // -12
385        {0x20A0, 41, -31}, // -13
386        {0x2150, 44, 45}, // -14
387        {0x2190, 45, 46}, // -15
388        {0x2440, 49, -32}, // -16
389        {0x25A0, 53, 54}, // -17
390        {0x27C0, 56, 0}, // -18
391        {0x2FE0, 59, -33}, // -19
392        {0x3040, 61, 62}, // -20
393        {0x30A0, 62, 63}, // -21
394        {0x3130, 64, 65}, // -22
395        {0x3190, 65, -34}, // -23
396        {0x4DB6, 70, 0}, // -24
397        {0xA490, 72, -35}, // -25
398        {0xD7A4, 74, 0}, // -26
399        {0xFB50, 80, 81}, // -27
400        {0xFE20, 0, -36}, // -28
401        {0xFEFF, 85, 86}, // -29
402        {0xFFF0, 87, -37}, // -30
403        {0x20D0, 42, 43}, // -31
404        {0x2460, 50, 51}, // -32
405        {0x2FF0, 0, 60}, // -33
406        {0x31A0, 66, -38}, // -34
407        {0xA4D0, 73, 0}, //-35
408        {0xFE30, 82, -39}, //-36
409        {0xFFFE, 88, 0}, //-37
410        {0x31C0, 67, 0}, // -38
411        {0xFE50, 83, -40}, //-39
412        {0xFE70, 84, 85} // -40
413    };
414
415    static final byte[] charToBlock = {
416      1, 2, 3, 4, -1, -2, -3, 8, 9, 9, -4, -5, 12, 12, -6, -7,
417      0, 0, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 27,
418      28, -8, 30, 30, 31, 31, 31, -9, 33, 33, 33, 33, 33, -10, 0, 36,
419      37, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 38, 39, 39,
420      -12, -13, -14, -15, 47, 47, 48, 48, -16, 51, 52, -17, 55, 55, 56, -18,
421      57, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 59, -19,
422      -20, -21, -22, -23, 68, 68, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70,
423      70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
424      70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
425      70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, -24, 71, 71, 71, 71,
426      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
427      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
428      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
429      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
430      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
431      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
432      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
433      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
434      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
435      71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
436      72, 72, 72, 72, 72, 72, 72, 72, 72, -25, 0, 0, 0, 0, 0, 0,
437      0, 0, 0, 0, 0, 0, 0, 0, 74, 74, 74, 74, 74, 74, 74, 74,
438      74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
439      74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
440      74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
441      74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
442      74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, -26,
443      75, 75, 75, 75, 75, 75, 75, 76, 77, 77, 77, 77, 77, 77, 77, 77,
444      78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
445      78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
446      78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
447      78, 78, 79, 79, 79, 79, -27, 81, 81, 81, 81, 81, -28, -29, 87, -30
448    };
449    */
450}
451