1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.harmony.regex.tests.java.util.regex;
18
19import java.util.regex.Matcher;
20import java.util.regex.Pattern;
21import java.util.regex.PatternSyntaxException;
22
23import junit.framework.TestCase;
24
25/**
26 * Tests simple Pattern compilation and Matcher methods
27 *
28 */
29public class Pattern2Test extends TestCase {
30
31    public void testSimpleMatch() throws PatternSyntaxException {
32        Pattern p = Pattern.compile("foo.*");
33
34        Matcher m1 = p.matcher("foo123");
35        assertTrue(m1.matches());
36        assertTrue(m1.find(0));
37        assertTrue(m1.lookingAt());
38
39        Matcher m2 = p.matcher("fox");
40        assertFalse(m2.matches());
41        assertFalse(m2.find(0));
42        assertFalse(m2.lookingAt());
43
44        assertTrue(Pattern.matches("foo.*", "foo123"));
45        assertFalse(Pattern.matches("foo.*", "fox"));
46
47        assertFalse(Pattern.matches("bar", "foobar"));
48
49        assertTrue(Pattern.matches("", ""));
50    }
51    public void testCursors() {
52        Pattern p;
53        Matcher m;
54
55        try {
56            p = Pattern.compile("foo");
57
58            m = p.matcher("foobar");
59            assertTrue(m.find());
60            assertEquals(0, m.start());
61            assertEquals(3, m.end());
62            assertFalse(m.find());
63
64            // Note: also testing reset here
65            m.reset();
66            assertTrue(m.find());
67            assertEquals(0, m.start());
68            assertEquals(3, m.end());
69            assertFalse(m.find());
70
71            m.reset("barfoobar");
72            assertTrue(m.find());
73            assertEquals(3, m.start());
74            assertEquals(6, m.end());
75            assertFalse(m.find());
76
77            m.reset("barfoo");
78            assertTrue(m.find());
79            assertEquals(3, m.start());
80            assertEquals(6, m.end());
81            assertFalse(m.find());
82
83            m.reset("foobarfoobarfoo");
84            assertTrue(m.find());
85            assertEquals(0, m.start());
86            assertEquals(3, m.end());
87            assertTrue(m.find());
88            assertEquals(6, m.start());
89            assertEquals(9, m.end());
90            assertTrue(m.find());
91            assertEquals(12, m.start());
92            assertEquals(15, m.end());
93            assertFalse(m.find());
94            assertTrue(m.find(0));
95            assertEquals(0, m.start());
96            assertEquals(3, m.end());
97            assertTrue(m.find(4));
98            assertEquals(6, m.start());
99            assertEquals(9, m.end());
100        } catch (PatternSyntaxException e) {
101            System.out.println(e.getMessage());
102            fail();
103        }
104    }
105    public void testGroups() throws PatternSyntaxException {
106        Pattern p;
107        Matcher m;
108
109        p = Pattern.compile("(p[0-9]*)#?(q[0-9]*)");
110
111        m = p.matcher("p1#q3p2q42p5p71p63#q888");
112        assertTrue(m.find());
113        assertEquals(0, m.start());
114        assertEquals(5, m.end());
115        assertEquals(2, m.groupCount());
116        assertEquals(0, m.start(0));
117        assertEquals(5, m.end(0));
118        assertEquals(0, m.start(1));
119        assertEquals(2, m.end(1));
120        assertEquals(3, m.start(2));
121        assertEquals(5, m.end(2));
122        assertEquals("p1#q3", m.group());
123        assertEquals("p1#q3", m.group(0));
124        assertEquals("p1", m.group(1));
125        assertEquals("q3", m.group(2));
126
127        assertTrue(m.find());
128        assertEquals(5, m.start());
129        assertEquals(10, m.end());
130        assertEquals(2, m.groupCount());
131        assertEquals(10, m.end(0));
132        assertEquals(5, m.start(1));
133        assertEquals(7, m.end(1));
134        assertEquals(7, m.start(2));
135        assertEquals(10, m.end(2));
136        assertEquals("p2q42", m.group());
137        assertEquals("p2q42", m.group(0));
138        assertEquals("p2", m.group(1));
139        assertEquals("q42", m.group(2));
140
141        assertTrue(m.find());
142        assertEquals(15, m.start());
143        assertEquals(23, m.end());
144        assertEquals(2, m.groupCount());
145        assertEquals(15, m.start(0));
146        assertEquals(23, m.end(0));
147        assertEquals(15, m.start(1));
148        assertEquals(18, m.end(1));
149        assertEquals(19, m.start(2));
150        assertEquals(23, m.end(2));
151        assertEquals("p63#q888", m.group());
152        assertEquals("p63#q888", m.group(0));
153        assertEquals("p63", m.group(1));
154        assertEquals("q888", m.group(2));
155        assertFalse(m.find());
156    }
157
158    public void testReplace() throws PatternSyntaxException {
159        Pattern p;
160        Matcher m;
161
162        // Note: examples from book,
163        // Hitchens, Ron, 2002, "Java NIO", O'Reilly, page 171
164        p = Pattern.compile("a*b");
165
166        m = p.matcher("aabfooaabfooabfoob");
167        assertTrue(m.replaceAll("-").equals("-foo-foo-foo-"));
168        assertTrue(m.replaceFirst("-").equals("-fooaabfooabfoob"));
169
170        /*
171         * p = Pattern.compile ("\\p{Blank}");
172         *
173         * m = p.matcher ("fee fie foe fum"); assertTrue
174         * (m.replaceFirst("-").equals ("fee-fie foe fum")); assertTrue
175         * (m.replaceAll("-").equals ("fee-fie-foe-fum"));
176         */
177
178        p = Pattern.compile("([bB])yte");
179
180        m = p.matcher("Byte for byte");
181        assertTrue(m.replaceFirst("$1ite").equals("Bite for byte"));
182        assertTrue(m.replaceAll("$1ite").equals("Bite for bite"));
183
184        p = Pattern.compile("\\d\\d\\d\\d([- ])");
185
186        m = p.matcher("card #1234-5678-1234");
187        assertTrue(m.replaceFirst("xxxx$1").equals("card #xxxx-5678-1234"));
188        assertTrue(m.replaceAll("xxxx$1").equals("card #xxxx-xxxx-1234"));
189
190        p = Pattern.compile("(up|left)( *)(right|down)");
191
192        m = p.matcher("left right, up down");
193        assertTrue(m.replaceFirst("$3$2$1").equals("right left, up down"));
194        assertTrue(m.replaceAll("$3$2$1").equals("right left, down up"));
195
196        p = Pattern.compile("([CcPp][hl]e[ea]se)");
197
198        m = p.matcher("I want cheese. Please.");
199        assertTrue(m.replaceFirst("<b> $1 </b>").equals(
200                "I want <b> cheese </b>. Please."));
201        assertTrue(m.replaceAll("<b> $1 </b>").equals(
202                "I want <b> cheese </b>. <b> Please </b>."));
203    }
204
205    public void testEscapes() throws PatternSyntaxException {
206        Pattern p;
207        Matcher m;
208
209        // Test \\ sequence
210        p = Pattern.compile("([a-z]+)\\\\([a-z]+);");
211        m = p.matcher("fred\\ginger;abbott\\costello;jekell\\hyde;");
212        assertTrue(m.find());
213        assertEquals("fred", m.group(1));
214        assertEquals("ginger", m.group(2));
215        assertTrue(m.find());
216        assertEquals("abbott", m.group(1));
217        assertEquals("costello", m.group(2));
218        assertTrue(m.find());
219        assertEquals("jekell", m.group(1));
220        assertEquals("hyde", m.group(2));
221        assertFalse(m.find());
222
223        // Test \n, \t, \r, \f, \e, \a sequences
224        p = Pattern.compile("([a-z]+)[\\n\\t\\r\\f\\e\\a]+([a-z]+)");
225        m = p.matcher("aa\nbb;cc\u0009\rdd;ee\u000C\u001Bff;gg\n\u0007hh");
226        assertTrue(m.find());
227        assertEquals("aa", m.group(1));
228        assertEquals("bb", m.group(2));
229        assertTrue(m.find());
230        assertEquals("cc", m.group(1));
231        assertEquals("dd", m.group(2));
232        assertTrue(m.find());
233        assertEquals("ee", m.group(1));
234        assertEquals("ff", m.group(2));
235        assertTrue(m.find());
236        assertEquals("gg", m.group(1));
237        assertEquals("hh", m.group(2));
238        assertFalse(m.find());
239
240        // Test \\u and \\x sequences
241/*        p = Pattern.compile("([0-9]+)[\\u0020:\\x21];");
242        m = p.matcher("11:;22 ;33-;44!;");
243        assertTrue(m.find());
244        assertEquals("11", m.group(1));
245        assertTrue(m.find());
246        assertEquals("22", m.group(1));
247        assertTrue(m.find());
248        assertEquals("44", m.group(1));
249        assertFalse(m.find());
250*/
251        // Test invalid unicode sequences
252/*        try {
253            p = Pattern.compile("\\u");
254            fail("PatternSyntaxException expected");
255        } catch (PatternSyntaxException e) {
256        }
257
258        try {
259            p = Pattern.compile("\\u;");
260            fail("PatternSyntaxException expected");
261        } catch (PatternSyntaxException e) {
262        }
263
264        try {
265            p = Pattern.compile("\\u002");
266            fail("PatternSyntaxException expected");
267        } catch (PatternSyntaxException e) {
268        }
269
270        try {
271            p = Pattern.compile("\\u002;");
272            fail("PatternSyntaxException expected");
273        } catch (PatternSyntaxException e) {
274        }
275
276        // Test invalid hex sequences
277        try {
278            p = Pattern.compile("\\x");
279            fail("PatternSyntaxException expected");
280        } catch (PatternSyntaxException e) {
281        }
282
283        try {
284            p = Pattern.compile("\\x;");
285            fail("PatternSyntaxException expected");
286        } catch (PatternSyntaxException e) {
287        }
288
289        try {
290            p = Pattern.compile("\\xa");
291            fail("PatternSyntaxException expected");
292        } catch (PatternSyntaxException e) {
293        }
294
295        try {
296            p = Pattern.compile("\\xa;");
297            fail("PatternSyntaxException expected");
298        } catch (PatternSyntaxException e) {
299        }
300*/
301        // Test \0 (octal) sequences (1, 2 and 3 digit)
302        p = Pattern.compile("([0-9]+)[\\07\\040\\0160];");
303        m = p.matcher("11\u0007;22:;33 ;44p;");
304        assertTrue(m.find());
305        assertEquals("11", m.group(1));
306        assertTrue(m.find());
307        assertEquals("33", m.group(1));
308        assertTrue(m.find());
309        assertEquals("44", m.group(1));
310        assertFalse(m.find());
311
312        // Test invalid octal sequences
313        try {
314            p = Pattern.compile("\\08");
315            fail("PatternSyntaxException expected");
316        } catch (PatternSyntaxException e) {
317        }
318
319        //originally contributed test did not check the result
320        //TODO: check what RI does here
321//        try {
322//            p = Pattern.compile("\\0477");
323//            fail("PatternSyntaxException expected");
324//        } catch (PatternSyntaxException e) {
325//        }
326
327        try {
328            p = Pattern.compile("\\0");
329            fail("PatternSyntaxException expected");
330        } catch (PatternSyntaxException e) {
331        }
332
333        try {
334            p = Pattern.compile("\\0;");
335            fail("PatternSyntaxException expected");
336        } catch (PatternSyntaxException e) {
337        }
338
339
340        // Test \c (control character) sequence
341        p = Pattern.compile("([0-9]+)[\\cA\\cB\\cC\\cD];");
342        m = p.matcher("11\u0001;22:;33\u0002;44p;55\u0003;66\u0004;");
343        assertTrue(m.find());
344        assertEquals("11", m.group(1));
345        assertTrue(m.find());
346        assertEquals("33", m.group(1));
347        assertTrue(m.find());
348        assertEquals("55", m.group(1));
349        assertTrue(m.find());
350        assertEquals("66", m.group(1));
351        assertFalse(m.find());
352
353        // More thorough control escape test
354        // Ensure that each escape matches exactly the corresponding
355        // character
356        // code and no others (well, from 0-255 at least)
357        int i, j;
358        for (i = 0; i < 26; i++) {
359            p = Pattern.compile("\\c" + Character.toString((char) ('A' + i)));
360            int match_char = -1;
361            for (j = 0; j < 255; j++) {
362                m = p.matcher(Character.toString((char) j));
363                if (m.matches()) {
364                    assertEquals(-1, match_char);
365                    match_char = j;
366                }
367            }
368            assertTrue(match_char == i + 1);
369        }
370
371        // Test invalid control escapes
372// BEGIN android-removed
373// ICU doesn't complain about illegal control sequences
374//        try {
375//            p = Pattern.compile("\\c");
376//            fail("PatternSyntaxException expected");
377//        } catch (PatternSyntaxException e) {
378//        }
379// END android-removed
380
381        //originally contributed test did not check the result
382        //TODO: check what RI does here
383//        try {
384//            p = Pattern.compile("\\c;");
385//            fail("PatternSyntaxException expected");
386//        } catch (PatternSyntaxException e) {
387//        }
388//
389//        try {
390//            p = Pattern.compile("\\ca;");
391//            fail("PatternSyntaxException expected");
392//        } catch (PatternSyntaxException e) {
393//        }
394//
395//        try {
396//            p = Pattern.compile("\\c4;");
397//            fail("PatternSyntaxException expected");
398//        } catch (PatternSyntaxException e) {
399//        }
400    }
401    public void testCharacterClasses() throws PatternSyntaxException {
402        Pattern p;
403        Matcher m;
404
405        // Test one character range
406        p = Pattern.compile("[p].*[l]");
407        m = p.matcher("paul");
408        assertTrue(m.matches());
409        m = p.matcher("pool");
410        assertTrue(m.matches());
411        m = p.matcher("pong");
412        assertFalse(m.matches());
413        m = p.matcher("pl");
414        assertTrue(m.matches());
415
416        // Test two character range
417        p = Pattern.compile("[pm].*[lp]");
418        m = p.matcher("prop");
419        assertTrue(m.matches());
420        m = p.matcher("mall");
421        assertTrue(m.matches());
422        m = p.matcher("pong");
423        assertFalse(m.matches());
424        m = p.matcher("pill");
425        assertTrue(m.matches());
426
427        // Test range including [ and ]
428        p = Pattern.compile("[<\\[].*[\\]>]");
429        m = p.matcher("<foo>");
430        assertTrue(m.matches());
431        m = p.matcher("[bar]");
432        assertTrue(m.matches());
433        m = p.matcher("{foobar]");
434        assertFalse(m.matches());
435        m = p.matcher("<pill]");
436        assertTrue(m.matches());
437
438        // Test range using ^
439        p = Pattern.compile("[^bc][a-z]+[tr]");
440        m = p.matcher("pat");
441        assertTrue(m.matches());
442        m = p.matcher("liar");
443        assertTrue(m.matches());
444        m = p.matcher("car");
445        assertFalse(m.matches());
446        m = p.matcher("gnat");
447        assertTrue(m.matches());
448
449        // Test character range using -
450        p = Pattern.compile("[a-z]_+[a-zA-Z]-+[0-9p-z]");
451        m = p.matcher("d__F-8");
452        assertTrue(m.matches());
453        m = p.matcher("c_a-q");
454        assertTrue(m.matches());
455        m = p.matcher("a__R-a");
456        assertFalse(m.matches());
457        m = p.matcher("r_____d-----5");
458        assertTrue(m.matches());
459
460        // Test range using unicode characters and unicode and hex escapes
461        p = Pattern.compile("[\\u1234-\\u2345]_+[a-z]-+[\u0001-\\x11]");
462        m = p.matcher("\u2000_q-\u0007");
463        assertTrue(m.matches());
464        m = p.matcher("\u1234_z-\u0001");
465        assertTrue(m.matches());
466        m = p.matcher("r_p-q");
467        assertFalse(m.matches());
468        m = p.matcher("\u2345_____d-----\n");
469        assertTrue(m.matches());
470
471// BEGIN android-removed
472// The "---" collides with ICU's "--" operator and is likely to be a user error
473// anyway, so we simply comment this one out.
474//        // Test ranges including the "-" character
475//        p = Pattern.compile("[\\*-/]_+[---]!+[--AP]");
476//        m = p.matcher("-_-!!A");
477//        assertTrue(m.matches());
478//        m = p.matcher("\u002b_-!!!-");
479//        assertTrue(m.matches());
480//        m = p.matcher("!_-!@");
481//        assertFalse(m.matches());
482//        m = p.matcher(",______-!!!!!!!P");
483//        assertTrue(m.matches());
484// END android-removed
485
486        // Test nested ranges
487        p = Pattern.compile("[pm[t]][a-z]+[[r]lp]");
488        m = p.matcher("prop");
489        assertTrue(m.matches());
490        m = p.matcher("tsar");
491        assertTrue(m.matches());
492        m = p.matcher("pong");
493        assertFalse(m.matches());
494        m = p.matcher("moor");
495        assertTrue(m.matches());
496
497        // Test character class intersection with &&
498        // TODO: figure out what x&&y or any class with a null intersection
499        // set (like [[a-c]&&[d-f]]) might mean. It doesn't mean "match
500        // nothing" and doesn't mean "match anything" so I'm stumped.
501        p = Pattern.compile("[[a-p]&&[g-z]]+-+[[a-z]&&q]-+[x&&[a-z]]-+");
502        m = p.matcher("h--q--x--");
503        assertTrue(m.matches());
504        m = p.matcher("hog--q-x-");
505        assertTrue(m.matches());
506        m = p.matcher("ape--q-x-");
507        assertFalse(m.matches());
508        m = p.matcher("mop--q-x----");
509        assertTrue(m.matches());
510
511        // Test error cases with &&
512// BEGIN android-removed
513// This is more of a bug, and ICU doesn't have this behavior.
514//            p = Pattern.compile("[&&[xyz]]");
515//            m = p.matcher("&");
516//            // System.out.println(m.matches());
517//            m = p.matcher("x");
518//            // System.out.println(m.matches());
519//            m = p.matcher("y");
520//            // System.out.println(m.matches());
521// END android-removed
522            p = Pattern.compile("[[xyz]&[axy]]");
523            m = p.matcher("x");
524            // System.out.println(m.matches());
525            m = p.matcher("z");
526            // System.out.println(m.matches());
527            m = p.matcher("&");
528            // System.out.println(m.matches());
529            p = Pattern.compile("[abc[123]&&[345]def]");
530            m = p.matcher("a");
531            // System.out.println(m.matches());
532
533// BEGIN android-removed
534// This is more of a bug, and ICU doesn't have this behavior.
535//            p = Pattern.compile("[[xyz]&&]");
536// END android-removed
537            p = Pattern.compile("[[abc]&]");
538
539        try {
540            p = Pattern.compile("[[abc]&&");
541            fail("PatternSyntaxException expected");
542        } catch (PatternSyntaxException e) {
543        }
544
545        p = Pattern.compile("[[abc]\\&&[xyz]]");
546
547        p = Pattern.compile("[[abc]&\\&[xyz]]");
548
549        // Test 3-way intersection
550        p = Pattern.compile("[[a-p]&&[g-z]&&[d-k]]");
551        m = p.matcher("g");
552        assertTrue(m.matches());
553        m = p.matcher("m");
554        assertFalse(m.matches());
555
556        // Test nested intersection
557        p = Pattern.compile("[[[a-p]&&[g-z]]&&[d-k]]");
558        m = p.matcher("g");
559        assertTrue(m.matches());
560        m = p.matcher("m");
561        assertFalse(m.matches());
562
563        // Test character class subtraction with && and ^
564        p = Pattern.compile("[[a-z]&&[^aeiou]][aeiou][[^xyz]&&[a-z]]");
565        m = p.matcher("pop");
566        assertTrue(m.matches());
567        m = p.matcher("tag");
568        assertTrue(m.matches());
569        m = p.matcher("eat");
570        assertFalse(m.matches());
571        m = p.matcher("tax");
572        assertFalse(m.matches());
573        m = p.matcher("zip");
574        assertTrue(m.matches());
575
576        // Test . (DOT), with and without DOTALL
577        // Note: DOT not allowed in character classes
578        p = Pattern.compile(".+/x.z");
579        m = p.matcher("!$/xyz");
580        assertTrue(m.matches());
581        m = p.matcher("%\n\r/x\nz");
582        assertFalse(m.matches());
583        p = Pattern.compile(".+/x.z", Pattern.DOTALL);
584        m = p.matcher("%\n\r/x\nz");
585        assertTrue(m.matches());
586
587        // Test \d (digit)
588        p = Pattern.compile("\\d+[a-z][\\dx]");
589        m = p.matcher("42a6");
590        assertTrue(m.matches());
591        m = p.matcher("21zx");
592        assertTrue(m.matches());
593        m = p.matcher("ab6");
594        assertFalse(m.matches());
595        m = p.matcher("56912f9");
596        assertTrue(m.matches());
597
598        // Test \D (not a digit)
599        p = Pattern.compile("\\D+[a-z]-[\\D3]");
600        m = p.matcher("za-p");
601        assertTrue(m.matches());
602        m = p.matcher("%!e-3");
603        assertTrue(m.matches());
604        m = p.matcher("9a-x");
605        assertFalse(m.matches());
606        m = p.matcher("\u1234pp\ny-3");
607        assertTrue(m.matches());
608
609        // Test \s (whitespace)
610        p = Pattern.compile("<[a-zA-Z]+\\s+[0-9]+[\\sx][^\\s]>");
611        m = p.matcher("<cat \t1\fx>");
612        assertTrue(m.matches());
613        m = p.matcher("<cat \t1\f >");
614        assertFalse(m.matches());
615        m = p
616                .matcher("xyz <foo\n\r22 5> <pp \t\n\f\r \u000b41x\u1234><pp \nx7\rc> zzz");
617        assertTrue(m.find());
618        assertTrue(m.find());
619        assertFalse(m.find());
620
621        // Test \S (not whitespace)
622        p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221>");
623        m = p.matcher("<f $0**\n** 221>");
624        assertTrue(m.matches());
625        m = p.matcher("<x 441\t221>");
626        assertTrue(m.matches());
627        m = p.matcher("<z \t9\ng 221>");
628        assertFalse(m.matches());
629        m = p.matcher("<z 60\ngg\u1234\f221>");
630        assertTrue(m.matches());
631        p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221[\\S&&[^abc]]>");
632        m = p.matcher("<f $0**\n** 221x>");
633        assertTrue(m.matches());
634        m = p.matcher("<x 441\t221z>");
635        assertTrue(m.matches());
636        m = p.matcher("<x 441\t221 >");
637        assertFalse(m.matches());
638        m = p.matcher("<x 441\t221c>");
639        assertFalse(m.matches());
640        m = p.matcher("<z \t9\ng 221x>");
641        assertFalse(m.matches());
642        m = p.matcher("<z 60\ngg\u1234\f221\u0001>");
643        assertTrue(m.matches());
644
645        // Test \w (ascii word)
646        p = Pattern.compile("<\\w+\\s[0-9]+;[^\\w]\\w+/[\\w$]+;");
647        m = p.matcher("<f1 99;!foo5/a$7;");
648        assertTrue(m.matches());
649        m = p.matcher("<f$ 99;!foo5/a$7;");
650        assertFalse(m.matches());
651        m = p
652                .matcher("<abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789 99;!foo5/a$7;");
653        assertTrue(m.matches());
654
655        // Test \W (not an ascii word)
656        p = Pattern.compile("<\\W\\w+\\s[0-9]+;[\\W_][^\\W]+\\s[0-9]+;");
657        m = p.matcher("<$foo3\n99;_bar\t0;");
658        assertTrue(m.matches());
659        m = p.matcher("<hh 99;_g 0;");
660        assertFalse(m.matches());
661        m = p.matcher("<*xx\t00;^zz\f11;");
662        assertTrue(m.matches());
663
664        // Test x|y pattern
665        // TODO
666    }
667    public void testPOSIXGroups() throws PatternSyntaxException {
668        Pattern p;
669        Matcher m;
670
671        // Test POSIX groups using \p and \P (in the group and not in the group)
672        // Groups are Lower, Upper, ASCII, Alpha, Digit, XDigit, Alnum, Punct,
673        // Graph, Print, Blank, Space, Cntrl
674        // Test \p{Lower}
675        /*
676         * FIXME: Requires complex range processing
677         * p = Pattern.compile("<\\p{Lower}\\d\\P{Lower}:[\\p{Lower}Z]\\s[^\\P{Lower}]>");
678         * m = p.matcher("<a4P:g x>"); assertTrue(m.matches()); m =
679         * p.matcher("<p4%:Z\tq>"); assertTrue(m.matches()); m =
680         * p.matcher("<A6#:e e>"); assertFalse(m.matches());
681         */
682        p = Pattern.compile("\\p{Lower}+");
683        m = p.matcher("abcdefghijklmnopqrstuvwxyz");
684        assertTrue(m.matches());
685
686        // Invalid uses of \p{Lower}
687        try {
688            p = Pattern.compile("\\p");
689            fail("PatternSyntaxException expected");
690        } catch (PatternSyntaxException e) {
691        }
692
693        try {
694            p = Pattern.compile("\\p;");
695            fail("PatternSyntaxException expected");
696        } catch (PatternSyntaxException e) {
697        }
698
699        try {
700            p = Pattern.compile("\\p{");
701            fail("PatternSyntaxException expected");
702        } catch (PatternSyntaxException e) {
703        }
704
705        try {
706            p = Pattern.compile("\\p{;");
707            fail("PatternSyntaxException expected");
708        } catch (PatternSyntaxException e) {
709        }
710
711        try {
712            p = Pattern.compile("\\p{Lower");
713            fail("PatternSyntaxException expected");
714        } catch (PatternSyntaxException e) {
715        }
716
717        try {
718            p = Pattern.compile("\\p{Lower;");
719            fail("PatternSyntaxException expected");
720        } catch (PatternSyntaxException e) {
721        }
722
723        // Test \p{Upper}
724        /*
725         * FIXME: Requires complex range processing
726         * p = Pattern.compile("<\\p{Upper}\\d\\P{Upper}:[\\p{Upper}z]\\s[^\\P{Upper}]>");
727         * m = p.matcher("<A4p:G X>"); assertTrue(m.matches()); m =
728         * p.matcher("<P4%:z\tQ>"); assertTrue(m.matches()); m =
729         * p.matcher("<a6#:E E>"); assertFalse(m.matches());
730         */
731        p = Pattern.compile("\\p{Upper}+");
732        m = p.matcher("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
733        assertTrue(m.matches());
734
735        // Invalid uses of \p{Upper}
736        try {
737            p = Pattern.compile("\\p{Upper");
738            fail("PatternSyntaxException expected");
739        } catch (PatternSyntaxException e) {
740        }
741
742        try {
743            p = Pattern.compile("\\p{Upper;");
744            fail("PatternSyntaxException expected");
745        } catch (PatternSyntaxException e) {
746        }
747
748        // Test \p{ASCII}
749        /*
750         * FIXME: Requires complex range processing p = Pattern.compile("<\\p{ASCII}\\d\\P{ASCII}:[\\p{ASCII}\u1234]\\s[^\\P{ASCII}]>");
751         * m = p.matcher("<A4\u0080:G X>"); assertTrue(m.matches()); m =
752         * p.matcher("<P4\u00ff:\u1234\t\n>"); assertTrue(m.matches()); m =
753         * p.matcher("<\u00846#:E E>"); assertFalse(m.matches())
754         */
755        int i;
756        p = Pattern.compile("\\p{ASCII}");
757        for (i = 0; i < 0x80; i++) {
758            m = p.matcher(Character.toString((char) i));
759            assertTrue(m.matches());
760        }
761        for (; i < 0xff; i++) {
762            m = p.matcher(Character.toString((char) i));
763            assertFalse(m.matches());
764        }
765
766        // Invalid uses of \p{ASCII}
767        try {
768            p = Pattern.compile("\\p{ASCII");
769            fail("PatternSyntaxException expected");
770        } catch (PatternSyntaxException e) {
771        }
772
773        try {
774            p = Pattern.compile("\\p{ASCII;");
775            fail("PatternSyntaxException expected");
776        } catch (PatternSyntaxException e) {
777        }
778
779        // Test \p{Alpha}
780        // TODO
781
782        // Test \p{Digit}
783        // TODO
784
785        // Test \p{XDigit}
786        // TODO
787
788        // Test \p{Alnum}
789        // TODO
790
791        // Test \p{Punct}
792        // TODO
793
794        // Test \p{Graph}
795        // TODO
796
797        // Test \p{Print}
798        // TODO
799
800        // Test \p{Blank}
801        // TODO
802
803        // Test \p{Space}
804        // TODO
805
806        // Test \p{Cntrl}
807        // TODO
808    }
809    public void testUnicodeCategories() throws PatternSyntaxException {
810        // Test Unicode categories using \p and \P
811        // One letter codes: L, M, N, P, S, Z, C
812        // Two letter codes: Lu, Nd, Sc, Sm, ...
813        // See java.lang.Character and Unicode standard for complete list
814        // TODO
815        // Test \p{L}
816        // TODO
817
818        // Test \p{N}
819        // TODO
820
821        // Test two letter codes:
822        // From unicode.org:
823        // Lu
824        // Ll
825        // Lt
826        // Lm
827        // Lo
828        // Mn
829        // Mc
830        // Me
831        // Nd
832        // Nl
833        // No
834        // Pc
835        // Pd
836        // Ps
837        // Pe
838        // Pi
839        // Pf
840        // Po
841        // Sm
842        // Sc
843        // Sk
844        // So
845        // Zs
846        // Zl
847        // Zp
848        // Cc
849        // Cf
850        // Cs
851        // Co
852        // Cn
853
854        // TODO add more tests per category
855        //{"Cc", "\u0000", "-\u0041"},
856        testCategory("Cf", "\u202B");
857        testCategory("Co", "\uE000");
858        testCategory("Cs", "\uD800");
859        testCategory("Ll", "a", "b", "x", "y", "z", "-A", "-Z");
860        testCategory("Lm", "\u02B9");
861        testCategory("Lu", "B", "C", "-c");
862        testCategory("Lo", "\u05E2");
863        testCategory("Lt", "\u01C5");
864        testCategory("Mc", "\u0903");
865        testCategory("Me", "\u0488");
866        testCategory("Mn", "\u0300");
867        testCategory("Nd", "\u0030");
868        testCategory("Nl", "\u2164");
869        testCategory("No", "\u0BF0");
870        // testCategory("Pc", "\u30FB");
871        testCategory("Pd", "\u2015");
872        testCategory("Pe", "\u207E");
873        testCategory("Po", "\u00B7");
874        testCategory("Ps", "\u0F3C");
875        testCategory("Sc", "\u20A0");
876        testCategory("Sk", "\u00B8");
877        testCategory("Sm", "\u002B");
878        testCategory("So", "\u0B70");
879        testCategory("Zl", "\u2028");
880        // testCategory("Pi", "\u200C");
881        testCategory("Zp", "\u2029");
882    }
883
884    private void testCategory(String cat, String... matches) {
885        String pa = "{"+cat+"}";
886        String pat = "\\p"+pa;
887        String npat = "\\P"+pa;
888        Pattern p = Pattern.compile(pat);
889        Pattern pn = Pattern.compile(npat);
890        for (int j = 0; j < matches.length; j++) {
891            String t = matches[j];
892            boolean invert = t.startsWith("-");
893            if (invert) {
894                // test negative case, expected to fail
895                t = t.substring(1);
896                assertFalse("expected '"+t+"' to not be matched " +
897                        "by pattern '"+pat, p.matcher(t).matches());
898                assertTrue("expected '"+t+"' to  " +
899                        "be matched by pattern '"+npat, pn.matcher(t).matches());
900            } else {
901                assertTrue("expected '"+t+"' to be matched " +
902                        "by pattern '"+pat, p.matcher(t).matches());
903                assertFalse("expected '"+t+"' to  " +
904                        "not be matched by pattern '"+npat, pn.matcher(t).matches());
905            }
906        }
907    }
908
909    public void testUnicodeBlocks() throws PatternSyntaxException {
910        Pattern p;
911        Matcher m;
912        int i, j;
913
914        // Test Unicode blocks using \p and \P
915        // FIXME:
916        // Note that LatinExtended-B and ArabicPresentations-B are unrecognized
917        // by the reference JDK.
918        for (i = 0; i < UBlocks.length; i++) {
919            /*
920             * p = Pattern.compile("\\p{"+UBlocks[i].name+"}");
921             *
922             * if (UBlocks[i].low > 0) { m =
923             * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
924             * assertFalse(m.matches()); } for (j=UBlocks[i].low; j <=
925             * UBlocks[i].high; j++) { m =
926             * p.matcher(Character.toString((char)j));
927             * assertTrue(m.matches()); } if (UBlocks[i].high < 0xFFFF) { m =
928             * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
929             * assertFalse(m.matches()); }
930             *
931             * p = Pattern.compile("\\P{"+UBlocks[i].name+"}");
932             *
933             * if (UBlocks[i].low > 0) { m =
934             * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
935             * assertTrue(m.matches()); } for (j=UBlocks[i].low; j <
936             * UBlocks[i].high; j++) { m =
937             * p.matcher(Character.toString((char)j));
938             * assertFalse(m.matches()); } if (UBlocks[i].high < 0xFFFF) { m =
939             * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
940             * assertTrue(m.matches()); }
941             */
942
943            p = Pattern.compile("\\p{In" + UBlocks[i].name + "}");
944// BEGIN android-changed
945// Added the name of the block under test to the assertion to get more output.
946
947            if (UBlocks[i].low > 0) {
948                m = p.matcher(Character.toString((char) (UBlocks[i].low - 1)));
949                assertFalse(UBlocks[i].name, m.matches());
950            }
951            for (j = UBlocks[i].low; j <= UBlocks[i].high; j++) {
952                m = p.matcher(Character.toString((char) j));
953                assertTrue(UBlocks[i].name, m.matches());
954            }
955            if (UBlocks[i].high < 0xFFFF) {
956                m = p.matcher(Character.toString((char) (UBlocks[i].high + 1)));
957                assertFalse(UBlocks[i].name, m.matches());
958            }
959
960            p = Pattern.compile("\\P{In" + UBlocks[i].name + "}");
961
962            if (UBlocks[i].low > 0) {
963                m = p.matcher(Character.toString((char) (UBlocks[i].low - 1)));
964                assertTrue(UBlocks[i].name, m.matches());
965            }
966            for (j = UBlocks[i].low; j < UBlocks[i].high; j++) {
967                m = p.matcher(Character.toString((char) j));
968                assertFalse(UBlocks[i].name, m.matches());
969            }
970            if (UBlocks[i].high < 0xFFFF) {
971                m = p.matcher(Character.toString((char) (UBlocks[i].high + 1)));
972                assertTrue(UBlocks[i].name, m.matches());
973            }
974
975// END android-changed
976        }
977    }
978    public void testCapturingGroups() throws PatternSyntaxException {
979        Pattern p;
980        Matcher m;
981
982        // Test simple capturing groups
983        p = Pattern.compile("(a+)b");
984        m = p.matcher("aaaaaaaab");
985        assertTrue(m.matches());
986        assertEquals(1, m.groupCount());
987        assertEquals("aaaaaaaa", m.group(1));
988
989        p = Pattern.compile("((an)+)((as)+)");
990        m = p.matcher("ananas");
991        assertTrue(m.matches());
992        assertEquals(4, m.groupCount());
993        assertEquals("ananas", m.group(0));
994        assertEquals("anan", m.group(1));
995        assertEquals("an", m.group(2));
996        assertEquals("as", m.group(3));
997        assertEquals("as", m.group(4));
998
999        // Test grouping without capture (?:...)
1000        p = Pattern.compile("(?:(?:an)+)(as)");
1001        m = p.matcher("ananas");
1002        assertTrue(m.matches());
1003        assertEquals(1, m.groupCount());
1004        assertEquals("as", m.group(1));
1005        try {
1006            m.group(2);
1007            fail("expected IndexOutOfBoundsException");
1008        } catch (IndexOutOfBoundsException ioobe) {
1009            // expected
1010        }
1011
1012        // Test combination of grouping and capture
1013        // TODO
1014
1015        // Test \<num> sequence with capturing and non-capturing groups
1016        // TODO
1017
1018        // Test \<num> with <num> out of range
1019        p = Pattern.compile("((an)+)as\\1");
1020        m = p.matcher("ananasanan");
1021        assertTrue(m.matches());
1022
1023        try {
1024            p = Pattern.compile("((an)+)as\\4");
1025            fail("expected PatternSyntaxException");
1026        } catch (PatternSyntaxException pse) {
1027            // expected
1028        }
1029
1030    }
1031    public void testRepeats() {
1032        Pattern p;
1033        Matcher m;
1034
1035        // Test ?
1036        p = Pattern.compile("(abc)?c");
1037        m = p.matcher("abcc");
1038        assertTrue(m.matches());
1039        m = p.matcher("c");
1040        assertTrue(m.matches());
1041        m = p.matcher("cc");
1042        assertFalse(m.matches());
1043        m = p.matcher("abcabcc");
1044        assertFalse(m.matches());
1045
1046        // Test *
1047        p = Pattern.compile("(abc)*c");
1048        m = p.matcher("abcc");
1049        assertTrue(m.matches());
1050        m = p.matcher("c");
1051        assertTrue(m.matches());
1052        m = p.matcher("cc");
1053        assertFalse(m.matches());
1054        m = p.matcher("abcabcc");
1055        assertTrue(m.matches());
1056
1057        // Test +
1058        p = Pattern.compile("(abc)+c");
1059        m = p.matcher("abcc");
1060        assertTrue(m.matches());
1061        m = p.matcher("c");
1062        assertFalse(m.matches());
1063        m = p.matcher("cc");
1064        assertFalse(m.matches());
1065        m = p.matcher("abcabcc");
1066        assertTrue(m.matches());
1067
1068        // Test {<num>}, including 0, 1 and more
1069        p = Pattern.compile("(abc){0}c");
1070        m = p.matcher("abcc");
1071        assertFalse(m.matches());
1072        m = p.matcher("c");
1073        assertTrue(m.matches());
1074
1075        p = Pattern.compile("(abc){1}c");
1076        m = p.matcher("abcc");
1077        assertTrue(m.matches());
1078        m = p.matcher("c");
1079        assertFalse(m.matches());
1080        m = p.matcher("abcabcc");
1081        assertFalse(m.matches());
1082
1083        p = Pattern.compile("(abc){2}c");
1084        m = p.matcher("abcc");
1085        assertFalse(m.matches());
1086        m = p.matcher("c");
1087        assertFalse(m.matches());
1088        m = p.matcher("cc");
1089        assertFalse(m.matches());
1090        m = p.matcher("abcabcc");
1091        assertTrue(m.matches());
1092
1093        // Test {<num>,}, including 0, 1 and more
1094        // TODO
1095
1096        // Test {<n1>,<n2>}, with n1 < n2, n1 = n2 and n1 > n2 (illegal?)
1097        // TODO
1098    }
1099    public void testAnchors() throws PatternSyntaxException {
1100        Pattern p;
1101        Matcher m;
1102
1103        // Test ^, default and MULTILINE
1104        p = Pattern.compile("^abc\\n^abc", Pattern.MULTILINE);
1105        m = p.matcher("abc\nabc");
1106        assertTrue(m.matches());
1107
1108        p = Pattern.compile("^abc\\n^abc");
1109        m = p.matcher("abc\nabc");
1110        assertFalse(m.matches());
1111
1112        // Test $, default and MULTILINE
1113        // TODO
1114
1115        // Test \b (word boundary)
1116        // TODO
1117
1118        // Test \B (not a word boundary)
1119        // TODO
1120
1121        // Test \A (beginning of string)
1122        // TODO
1123
1124        // Test \Z (end of string)
1125        // TODO
1126
1127        // Test \z (end of string)
1128        // TODO
1129
1130        // Test \G
1131        // TODO
1132
1133        // Test positive lookahead using (?=...)
1134        // TODO
1135
1136        // Test negative lookahead using (?!...)
1137        // TODO
1138
1139        // Test positive lookbehind using (?<=...)
1140        // TODO
1141
1142        // Test negative lookbehind using (?<!...)
1143        // TODO
1144    }
1145    public void testMisc() throws PatternSyntaxException {
1146        Pattern p;
1147        Matcher m;
1148
1149        // Test (?>...)
1150        // TODO
1151
1152        // Test (?onflags-offflags)
1153        // Valid flags are i,m,d,s,u,x
1154        // TODO
1155
1156        // Test (?onflags-offflags:...)
1157        // TODO
1158
1159        // Test \Q, \E
1160        p = Pattern.compile("[a-z]+;\\Q[a-z]+;\\Q(foo.*);\\E[0-9]+");
1161        m = p.matcher("abc;[a-z]+;\\Q(foo.*);411");
1162        assertTrue(m.matches());
1163        m = p.matcher("abc;def;foo42;555");
1164        assertFalse(m.matches());
1165        m = p.matcher("abc;\\Qdef;\\Qfoo99;\\E123");
1166        assertFalse(m.matches());
1167
1168        p = Pattern.compile("[a-z]+;(foo[0-9]-\\Q(...)\\E);[0-9]+");
1169        m = p.matcher("abc;foo5-(...);123");
1170        assertTrue(m.matches());
1171        assertEquals("foo5-(...)", m.group(1));
1172        m = p.matcher("abc;foo9-(xxx);789");
1173        assertFalse(m.matches());
1174
1175        p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q$-\\E]+);[0-9]+");
1176        m = p.matcher("abc;bar0-def$-;123");
1177        assertTrue(m.matches());
1178
1179        // FIXME:
1180        // This should work the same as the pattern above but fails with the
1181        // the reference JDK
1182        p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q-$\\E]+);[0-9]+");
1183        m = p.matcher("abc;bar0-def$-;123");
1184        // assertTrue(m.matches());
1185
1186        // FIXME:
1187        // This should work too .. it looks as if just about anything that
1188        // has more
1189        // than one character between \Q and \E is broken in the the reference JDK
1190        p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q[0-9]\\E]+);[0-9]+");
1191        m = p.matcher("abc;bar0-def[99]-]0x[;123");
1192        // assertTrue(m.matches());
1193
1194        // This is the same as above but with explicit escapes .. and this
1195        // does work
1196        // on the the reference JDK
1197        p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\[0\\-9\\]]+);[0-9]+");
1198        m = p.matcher("abc;bar0-def[99]-]0x[;123");
1199        assertTrue(m.matches());
1200
1201        // Test #<comment text>
1202        // TODO
1203    }
1204    public void testCompile1() throws PatternSyntaxException {
1205        Pattern pattern = Pattern
1206                .compile("[0-9A-Za-z][0-9A-Za-z\\x2e\\x3a\\x2d\\x5f]*");
1207        String name = "iso-8859-1";
1208        assertTrue(pattern.matcher(name).matches());
1209    }
1210    public void testCompile2() throws PatternSyntaxException {
1211        String findString = "\\Qimport\\E";
1212
1213        Pattern pattern = Pattern.compile(findString, 0);
1214        Matcher matcher = pattern.matcher(new String(
1215                "import a.A;\n\n import b.B;\nclass C {}"));
1216
1217        assertTrue(matcher.find(0));
1218    }
1219    public void testCompile3() throws PatternSyntaxException {
1220        Pattern p;
1221        Matcher m;
1222        p = Pattern.compile("a$");
1223        m = p.matcher("a\n");
1224        assertTrue(m.find());
1225        assertEquals("a", m.group());
1226        assertFalse(m.find());
1227
1228        p = Pattern.compile("(a$)");
1229        m = p.matcher("a\n");
1230        assertTrue(m.find());
1231        assertEquals("a", m.group());
1232        assertEquals("a", m.group(1));
1233        assertFalse(m.find());
1234
1235        p = Pattern.compile("^.*$", Pattern.MULTILINE);
1236
1237        m = p.matcher("a\n");
1238        assertTrue(m.find());
1239        // System.out.println("["+m.group()+"]");
1240        assertEquals("a", m.group());
1241        assertFalse(m.find());
1242
1243        m = p.matcher("a\nb\n");
1244        assertTrue(m.find());
1245        // System.out.println("["+m.group()+"]");
1246        assertEquals("a", m.group());
1247        assertTrue(m.find());
1248        // System.out.println("["+m.group()+"]");
1249        assertEquals("b", m.group());
1250        assertFalse(m.find());
1251
1252        m = p.matcher("a\nb");
1253        assertTrue(m.find());
1254        // System.out.println("["+m.group()+"]");
1255        assertEquals("a", m.group());
1256        assertTrue(m.find());
1257        assertEquals("b", m.group());
1258        assertFalse(m.find());
1259
1260        m = p.matcher("\naa\r\nbb\rcc\n\n");
1261        assertTrue(m.find());
1262        // System.out.println("["+m.group()+"]");
1263        assertTrue(m.group().equals(""));
1264        assertTrue(m.find());
1265        // System.out.println("["+m.group()+"]");
1266        assertEquals("aa", m.group());
1267        assertTrue(m.find());
1268        // System.out.println("["+m.group()+"]");
1269        assertEquals("bb", m.group());
1270        assertTrue(m.find());
1271        // System.out.println("["+m.group()+"]");
1272        assertEquals("cc", m.group());
1273        assertTrue(m.find());
1274        // System.out.println("["+m.group()+"]");
1275        assertTrue(m.group().equals(""));
1276        assertFalse(m.find());
1277
1278        m = p.matcher("a");
1279        assertTrue(m.find());
1280        assertEquals("a", m.group());
1281        assertFalse(m.find());
1282
1283// BEGIN android-removed
1284// Makes no sense to duplicate this weird behavior
1285//        m = p.matcher("");
1286//        // FIXME: This matches the reference behaviour but is
1287//        // inconsistent with matching "a" - ie. the end of the
1288//        // target string should match against $ always but this
1289//        // appears to work with the null string only when not in
1290//        // multiline mode (see below)
1291//        assertFalse(m.find());
1292// END android-removed
1293
1294        p = Pattern.compile("^.*$");
1295        m = p.matcher("");
1296        assertTrue(m.find());
1297        assertTrue(m.group().equals(""));
1298        assertFalse(m.find());
1299    }
1300    public void testCompile4() throws PatternSyntaxException {
1301        String findString = "\\Qpublic\\E";
1302        StringBuffer text = new StringBuffer("    public class Class {\n"
1303                + "    public class Class {");
1304
1305        Pattern pattern = Pattern.compile(findString, 0);
1306        Matcher matcher = pattern.matcher(text);
1307
1308        boolean found = matcher.find();
1309        assertTrue(found);
1310        assertEquals(4, matcher.start());
1311        if (found) {
1312            // modify text
1313            text.delete(0, text.length());
1314            text.append("Text have been changed.");
1315            matcher.reset(text);
1316        }
1317
1318        found = matcher.find();
1319        assertFalse(found);
1320    }
1321    public void testCompile5() throws PatternSyntaxException {
1322        Pattern p = Pattern.compile("^[0-9]");
1323        String s[] = p.split("12", -1);
1324        assertEquals("", s[0]);
1325        assertEquals("2", s[1]);
1326        assertEquals(2, s.length);
1327    }
1328
1329    //      public void testCompile6() {
1330    //        String regex = "[\\p{L}[\\p{Mn}[\\p{Pc}[\\p{Nd}[\\p{Nl}[\\p{Sc}]]]]]]+";
1331    //        String regex = "[\\p{L}\\p{Mn}\\p{Pc}\\p{Nd}\\p{Nl}\\p{Sc}]+";
1332    //        try {
1333    //            Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
1334    //            assertTrue(true);
1335    //        } catch (PatternSyntaxException e) {
1336    //            System.out.println(e.getMessage());
1337    //            assertTrue(false);
1338    //        }
1339    //    }
1340
1341    private static class UBInfo {
1342        public UBInfo(int low, int high, String name) {
1343            this.name = name;
1344            this.low = low;
1345            this.high = high;
1346        }
1347
1348        public String name;
1349
1350        public int low, high;
1351    }
1352
1353    // A table representing the unicode categories
1354    //private static UBInfo[] UCategories = {
1355    // Lu
1356    // Ll
1357    // Lt
1358    // Lm
1359    // Lo
1360    // Mn
1361    // Mc
1362    // Me
1363    // Nd
1364    // Nl
1365    // No
1366    // Pc
1367    // Pd
1368    // Ps
1369    // Pe
1370    // Pi
1371    // Pf
1372    // Po
1373    // Sm
1374    // Sc
1375    // Sk
1376    // So
1377    // Zs
1378    // Zl
1379    // Zp
1380    // Cc
1381    // Cf
1382    // Cs
1383    // Co
1384    // Cn
1385    //};
1386
1387    // A table representing the unicode character blocks
1388    private static UBInfo[] UBlocks = {
1389    /* 0000; 007F; Basic Latin */
1390    new UBInfo(0x0000, 0x007F, "BasicLatin"), // Character.UnicodeBlock.BASIC_LATIN
1391            /* 0080; 00FF; Latin-1 Supplement */
1392            new UBInfo(0x0080, 0x00FF, "Latin-1Supplement"), // Character.UnicodeBlock.LATIN_1_SUPPLEMENT
1393            /* 0100; 017F; Latin Extended-A */
1394            new UBInfo(0x0100, 0x017F, "LatinExtended-A"), // Character.UnicodeBlock.LATIN_EXTENDED_A
1395            /* 0180; 024F; Latin Extended-B */
1396            // new UBInfo (0x0180,0x024F,"InLatinExtended-B"), //
1397            // Character.UnicodeBlock.LATIN_EXTENDED_B
1398            /* 0250; 02AF; IPA Extensions */
1399            new UBInfo(0x0250, 0x02AF, "IPAExtensions"), // Character.UnicodeBlock.IPA_EXTENSIONS
1400            /* 02B0; 02FF; Spacing Modifier Letters */
1401            new UBInfo(0x02B0, 0x02FF, "SpacingModifierLetters"), // Character.UnicodeBlock.SPACING_MODIFIER_LETTERS
1402            /* 0300; 036F; Combining Diacritical Marks */
1403            new UBInfo(0x0300, 0x036F, "CombiningDiacriticalMarks"), // Character.UnicodeBlock.COMBINING_DIACRITICAL_MARKS
1404            /* 0370; 03FF; Greek */
1405            new UBInfo(0x0370, 0x03FF, "Greek"), // Character.UnicodeBlock.GREEK
1406            /* 0400; 04FF; Cyrillic */
1407            new UBInfo(0x0400, 0x04FF, "Cyrillic"), // Character.UnicodeBlock.CYRILLIC
1408            /* 0530; 058F; Armenian */
1409            new UBInfo(0x0530, 0x058F, "Armenian"), // Character.UnicodeBlock.ARMENIAN
1410            /* 0590; 05FF; Hebrew */
1411            new UBInfo(0x0590, 0x05FF, "Hebrew"), // Character.UnicodeBlock.HEBREW
1412            /* 0600; 06FF; Arabic */
1413            new UBInfo(0x0600, 0x06FF, "Arabic"), // Character.UnicodeBlock.ARABIC
1414            /* 0700; 074F; Syriac */
1415            new UBInfo(0x0700, 0x074F, "Syriac"), // Character.UnicodeBlock.SYRIAC
1416            /* 0780; 07BF; Thaana */
1417            new UBInfo(0x0780, 0x07BF, "Thaana"), // Character.UnicodeBlock.THAANA
1418            /* 0900; 097F; Devanagari */
1419            new UBInfo(0x0900, 0x097F, "Devanagari"), // Character.UnicodeBlock.DEVANAGARI
1420            /* 0980; 09FF; Bengali */
1421            new UBInfo(0x0980, 0x09FF, "Bengali"), // Character.UnicodeBlock.BENGALI
1422            /* 0A00; 0A7F; Gurmukhi */
1423            new UBInfo(0x0A00, 0x0A7F, "Gurmukhi"), // Character.UnicodeBlock.GURMUKHI
1424            /* 0A80; 0AFF; Gujarati */
1425            new UBInfo(0x0A80, 0x0AFF, "Gujarati"), // Character.UnicodeBlock.GUJARATI
1426            /* 0B00; 0B7F; Oriya */
1427            new UBInfo(0x0B00, 0x0B7F, "Oriya"), // Character.UnicodeBlock.ORIYA
1428            /* 0B80; 0BFF; Tamil */
1429            new UBInfo(0x0B80, 0x0BFF, "Tamil"), // Character.UnicodeBlock.TAMIL
1430            /* 0C00; 0C7F; Telugu */
1431            new UBInfo(0x0C00, 0x0C7F, "Telugu"), // Character.UnicodeBlock.TELUGU
1432            /* 0C80; 0CFF; Kannada */
1433            new UBInfo(0x0C80, 0x0CFF, "Kannada"), // Character.UnicodeBlock.KANNADA
1434            /* 0D00; 0D7F; Malayalam */
1435            new UBInfo(0x0D00, 0x0D7F, "Malayalam"), // Character.UnicodeBlock.MALAYALAM
1436            /* 0D80; 0DFF; Sinhala */
1437            new UBInfo(0x0D80, 0x0DFF, "Sinhala"), // Character.UnicodeBlock.SINHALA
1438            /* 0E00; 0E7F; Thai */
1439            new UBInfo(0x0E00, 0x0E7F, "Thai"), // Character.UnicodeBlock.THAI
1440            /* 0E80; 0EFF; Lao */
1441            new UBInfo(0x0E80, 0x0EFF, "Lao"), // Character.UnicodeBlock.LAO
1442            /* 0F00; 0FFF; Tibetan */
1443            new UBInfo(0x0F00, 0x0FFF, "Tibetan"), // Character.UnicodeBlock.TIBETAN
1444            /* 1000; 109F; Myanmar */
1445            new UBInfo(0x1000, 0x109F, "Myanmar"), // Character.UnicodeBlock.MYANMAR
1446            /* 10A0; 10FF; Georgian */
1447            new UBInfo(0x10A0, 0x10FF, "Georgian"), // Character.UnicodeBlock.GEORGIAN
1448            /* 1100; 11FF; Hangul Jamo */
1449            new UBInfo(0x1100, 0x11FF, "HangulJamo"), // Character.UnicodeBlock.HANGUL_JAMO
1450            /* 1200; 137F; Ethiopic */
1451            new UBInfo(0x1200, 0x137F, "Ethiopic"), // Character.UnicodeBlock.ETHIOPIC
1452            /* 13A0; 13FF; Cherokee */
1453            new UBInfo(0x13A0, 0x13FF, "Cherokee"), // Character.UnicodeBlock.CHEROKEE
1454            /* 1400; 167F; Unified Canadian Aboriginal Syllabics */
1455            new UBInfo(0x1400, 0x167F, "UnifiedCanadianAboriginalSyllabics"), // Character.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1456            /* 1680; 169F; Ogham */
1457            new UBInfo(0x1680, 0x169F, "Ogham"), // Character.UnicodeBlock.OGHAM
1458            /* 16A0; 16FF; Runic */
1459            new UBInfo(0x16A0, 0x16FF, "Runic"), // Character.UnicodeBlock.RUNIC
1460            /* 1780; 17FF; Khmer */
1461            new UBInfo(0x1780, 0x17FF, "Khmer"), // Character.UnicodeBlock.KHMER
1462            /* 1800; 18AF; Mongolian */
1463            new UBInfo(0x1800, 0x18AF, "Mongolian"), // Character.UnicodeBlock.MONGOLIAN
1464            /* 1E00; 1EFF; Latin Extended Additional */
1465            new UBInfo(0x1E00, 0x1EFF, "LatinExtendedAdditional"), // Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
1466            /* 1F00; 1FFF; Greek Extended */
1467            new UBInfo(0x1F00, 0x1FFF, "GreekExtended"), // Character.UnicodeBlock.GREEK_EXTENDED
1468            /* 2000; 206F; General Punctuation */
1469            new UBInfo(0x2000, 0x206F, "GeneralPunctuation"), // Character.UnicodeBlock.GENERAL_PUNCTUATION
1470            /* 2070; 209F; Superscripts and Subscripts */
1471            new UBInfo(0x2070, 0x209F, "SuperscriptsandSubscripts"), // Character.UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS
1472            /* 20A0; 20CF; Currency Symbols */
1473            new UBInfo(0x20A0, 0x20CF, "CurrencySymbols"), // Character.UnicodeBlock.CURRENCY_SYMBOLS
1474            /* 20D0; 20FF; Combining Marks for Symbols */
1475            new UBInfo(0x20D0, 0x20FF, "CombiningMarksforSymbols"), // Character.UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS
1476            /* 2100; 214F; Letterlike Symbols */
1477            new UBInfo(0x2100, 0x214F, "LetterlikeSymbols"), // Character.UnicodeBlock.LETTERLIKE_SYMBOLS
1478            /* 2150; 218F; Number Forms */
1479            new UBInfo(0x2150, 0x218F, "NumberForms"), // Character.UnicodeBlock.NUMBER_FORMS
1480            /* 2190; 21FF; Arrows */
1481            new UBInfo(0x2190, 0x21FF, "Arrows"), // Character.UnicodeBlock.ARROWS
1482            /* 2200; 22FF; Mathematical Operators */
1483            new UBInfo(0x2200, 0x22FF, "MathematicalOperators"), // Character.UnicodeBlock.MATHEMATICAL_OPERATORS
1484            /* 2300; 23FF; Miscellaneous Technical */
1485            new UBInfo(0x2300, 0x23FF, "MiscellaneousTechnical"), // Character.UnicodeBlock.MISCELLANEOUS_TECHNICAL
1486            /* 2400; 243F; Control Pictures */
1487            new UBInfo(0x2400, 0x243F, "ControlPictures"), // Character.UnicodeBlock.CONTROL_PICTURES
1488            /* 2440; 245F; Optical Character Recognition */
1489            new UBInfo(0x2440, 0x245F, "OpticalCharacterRecognition"), // Character.UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION
1490            /* 2460; 24FF; Enclosed Alphanumerics */
1491            new UBInfo(0x2460, 0x24FF, "EnclosedAlphanumerics"), // Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS
1492            /* 2500; 257F; Box Drawing */
1493            new UBInfo(0x2500, 0x257F, "BoxDrawing"), // Character.UnicodeBlock.BOX_DRAWING
1494            /* 2580; 259F; Block Elements */
1495            new UBInfo(0x2580, 0x259F, "BlockElements"), // Character.UnicodeBlock.BLOCK_ELEMENTS
1496            /* 25A0; 25FF; Geometric Shapes */
1497            new UBInfo(0x25A0, 0x25FF, "GeometricShapes"), // Character.UnicodeBlock.GEOMETRIC_SHAPES
1498            /* 2600; 26FF; Miscellaneous Symbols */
1499            new UBInfo(0x2600, 0x26FF, "MiscellaneousSymbols"), // Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS
1500            /* 2700; 27BF; Dingbats */
1501            new UBInfo(0x2700, 0x27BF, "Dingbats"), // Character.UnicodeBlock.DINGBATS
1502            /* 2800; 28FF; Braille Patterns */
1503            new UBInfo(0x2800, 0x28FF, "BraillePatterns"), // Character.UnicodeBlock.BRAILLE_PATTERNS
1504            /* 2E80; 2EFF; CJK Radicals Supplement */
1505            new UBInfo(0x2E80, 0x2EFF, "CJKRadicalsSupplement"), // Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT
1506            /* 2F00; 2FDF; Kangxi Radicals */
1507            new UBInfo(0x2F00, 0x2FDF, "KangxiRadicals"), // Character.UnicodeBlock.KANGXI_RADICALS
1508            /* 2FF0; 2FFF; Ideographic Description Characters */
1509            new UBInfo(0x2FF0, 0x2FFF, "IdeographicDescriptionCharacters"), // Character.UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1510            /* 3000; 303F; CJK Symbols and Punctuation */
1511            new UBInfo(0x3000, 0x303F, "CJKSymbolsandPunctuation"), // Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
1512            /* 3040; 309F; Hiragana */
1513            new UBInfo(0x3040, 0x309F, "Hiragana"), // Character.UnicodeBlock.HIRAGANA
1514            /* 30A0; 30FF; Katakana */
1515            new UBInfo(0x30A0, 0x30FF, "Katakana"), // Character.UnicodeBlock.KATAKANA
1516            /* 3100; 312F; Bopomofo */
1517            new UBInfo(0x3100, 0x312F, "Bopomofo"), // Character.UnicodeBlock.BOPOMOFO
1518            /* 3130; 318F; Hangul Compatibility Jamo */
1519            new UBInfo(0x3130, 0x318F, "HangulCompatibilityJamo"), // Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
1520            /* 3190; 319F; Kanbun */
1521            new UBInfo(0x3190, 0x319F, "Kanbun"), // Character.UnicodeBlock.KANBUN
1522            /* 31A0; 31BF; Bopomofo Extended */
1523            new UBInfo(0x31A0, 0x31BF, "BopomofoExtended"), // Character.UnicodeBlock.BOPOMOFO_EXTENDED
1524            /* 3200; 32FF; Enclosed CJK Letters and Months */
1525            new UBInfo(0x3200, 0x32FF, "EnclosedCJKLettersandMonths"), // Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS
1526            /* 3300; 33FF; CJK Compatibility */
1527            new UBInfo(0x3300, 0x33FF, "CJKCompatibility"), // Character.UnicodeBlock.CJK_COMPATIBILITY
1528            /* 3400; 4DB5; CJK Unified Ideographs Extension A */
1529// BEGIN android-changed
1530// Modified this to reflect current Unicode tables (or maybe it was a typo)
1531            new UBInfo(0x3400, 0x4DBF, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1532// END android-changed
1533            /* 4E00; 9FFF; CJK Unified Ideographs */
1534            new UBInfo(0x4E00, 0x9FFF, "CJKUnifiedIdeographs"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
1535            /* A000; A48F; Yi Syllables */
1536            new UBInfo(0xA000, 0xA48F, "YiSyllables"), // Character.UnicodeBlock.YI_SYLLABLES
1537            /* A490; A4CF; Yi Radicals */
1538            new UBInfo(0xA490, 0xA4CF, "YiRadicals"), // Character.UnicodeBlock.YI_RADICALS
1539            /* AC00; D7A3; Hangul Syllables */
1540// BEGIN android-changed
1541// Modified this to reflect current Unicode tables (or maybe it was a typo)
1542            new UBInfo(0xAC00, 0xD7AF, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES
1543// END android-changed
1544            /* D800; DB7F; High Surrogates */
1545            /* DB80; DBFF; High Private Use Surrogates */
1546            /* DC00; DFFF; Low Surrogates */
1547            /* E000; F8FF; Private Use */
1548            /* F900; FAFF; CJK Compatibility Ideographs */
1549            new UBInfo(0xF900, 0xFAFF, "CJKCompatibilityIdeographs"), // Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
1550            /* FB00; FB4F; Alphabetic Presentation Forms */
1551            new UBInfo(0xFB00, 0xFB4F, "AlphabeticPresentationForms"), // Character.UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS
1552            /* FB50; FDFF; Arabic Presentation Forms-A */
1553            new UBInfo(0xFB50, 0xFDFF, "ArabicPresentationForms-A"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_A
1554            /* FE20; FE2F; Combining Half Marks */
1555            new UBInfo(0xFE20, 0xFE2F, "CombiningHalfMarks"), // Character.UnicodeBlock.COMBINING_HALF_MARKS
1556            /* FE30; FE4F; CJK Compatibility Forms */
1557            new UBInfo(0xFE30, 0xFE4F, "CJKCompatibilityForms"), // Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS
1558            /* FE50; FE6F; Small Form Variants */
1559            new UBInfo(0xFE50, 0xFE6F, "SmallFormVariants"), // Character.UnicodeBlock.SMALL_FORM_VARIANTS
1560            /* FE70; FEFE; Arabic Presentation Forms-B */
1561            // new UBInfo (0xFE70,0xFEFE,"InArabicPresentationForms-B"), //
1562            // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B
1563            /* FEFF; FEFF; Specials */
1564// BEGIN android-changed
1565// Modified this to reflect current Unicode tables (or maybe it was a typo)
1566// FEFF is actually still Arabic Presentation Forms B
1567//            new UBInfo(0xFEFF, 0xFEFF, "Specials"), // Character.UnicodeBlock.SPECIALS
1568// END android-changed
1569            /* FF00; FFEF; Halfwidth and Fullwidth Forms */
1570            new UBInfo(0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"), // Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
1571            /* FFF0; FFFD; Specials */
1572            // BEGIN android-changed
1573// Modified this to reflect current Unicode tables (or maybe it was a typo)
1574            new UBInfo(0xFFF0, 0xFFFF, "Specials") // Character.UnicodeBlock.SPECIALS
1575// END android-changed
1576    };
1577}
1578