1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.harmony.tests.java.util.regex;
18
19import java.util.regex.Matcher;
20import java.util.regex.Pattern;
21import java.util.regex.PatternSyntaxException;
22
23import junit.framework.TestCase;
24
25/**
26 * Tests simple Pattern compilation and Matcher methods
27 */
28@SuppressWarnings("nls")
29public class Pattern2Test extends TestCase {
30    public void testSimpleMatch() throws PatternSyntaxException {
31        Pattern p = Pattern.compile("foo.*");
32
33        Matcher m1 = p.matcher("foo123");
34        assertTrue(m1.matches());
35        assertTrue(m1.find(0));
36        assertTrue(m1.lookingAt());
37
38        Matcher m2 = p.matcher("fox");
39        assertFalse(m2.matches());
40        assertFalse(m2.find(0));
41        assertFalse(m2.lookingAt());
42
43        assertTrue(Pattern.matches("foo.*", "foo123"));
44        assertFalse(Pattern.matches("foo.*", "fox"));
45
46        assertFalse(Pattern.matches("bar", "foobar"));
47
48        assertTrue(Pattern.matches("", ""));
49    }
50
51    public void testCursors() {
52        Pattern p;
53        Matcher m;
54
55        try {
56            p = Pattern.compile("foo");
57
58            m = p.matcher("foobar");
59            assertTrue(m.find());
60            assertEquals(0, m.start());
61            assertEquals(3, m.end());
62            assertFalse(m.find());
63
64            // Note: also testing reset here
65            m.reset();
66            assertTrue(m.find());
67            assertEquals(0, m.start());
68            assertEquals(3, m.end());
69            assertFalse(m.find());
70
71            m.reset("barfoobar");
72            assertTrue(m.find());
73            assertEquals(3, m.start());
74            assertEquals(6, m.end());
75            assertFalse(m.find());
76
77            m.reset("barfoo");
78            assertTrue(m.find());
79            assertEquals(3, m.start());
80            assertEquals(6, m.end());
81            assertFalse(m.find());
82
83            m.reset("foobarfoobarfoo");
84            assertTrue(m.find());
85            assertEquals(0, m.start());
86            assertEquals(3, m.end());
87            assertTrue(m.find());
88            assertEquals(6, m.start());
89            assertEquals(9, m.end());
90            assertTrue(m.find());
91            assertEquals(12, m.start());
92            assertEquals(15, m.end());
93            assertFalse(m.find());
94            assertTrue(m.find(0));
95            assertEquals(0, m.start());
96            assertEquals(3, m.end());
97            assertTrue(m.find(4));
98            assertEquals(6, m.start());
99            assertEquals(9, m.end());
100        } catch (PatternSyntaxException e) {
101            System.out.println(e.getMessage());
102            fail();
103        }
104    }
105
106    public void testGroups() throws PatternSyntaxException {
107        Pattern p;
108        Matcher m;
109
110        p = Pattern.compile("(p[0-9]*)#?(q[0-9]*)");
111
112        m = p.matcher("p1#q3p2q42p5p71p63#q888");
113        assertTrue(m.find());
114        assertEquals(0, m.start());
115        assertEquals(5, m.end());
116        assertEquals(2, m.groupCount());
117        assertEquals(0, m.start(0));
118        assertEquals(5, m.end(0));
119        assertEquals(0, m.start(1));
120        assertEquals(2, m.end(1));
121        assertEquals(3, m.start(2));
122        assertEquals(5, m.end(2));
123        assertEquals("p1#q3", m.group());
124        assertEquals("p1#q3", m.group(0));
125        assertEquals("p1", m.group(1));
126        assertEquals("q3", m.group(2));
127
128        assertTrue(m.find());
129        assertEquals(5, m.start());
130        assertEquals(10, m.end());
131        assertEquals(2, m.groupCount());
132        assertEquals(10, m.end(0));
133        assertEquals(5, m.start(1));
134        assertEquals(7, m.end(1));
135        assertEquals(7, m.start(2));
136        assertEquals(10, m.end(2));
137        assertEquals("p2q42", m.group());
138        assertEquals("p2q42", m.group(0));
139        assertEquals("p2", m.group(1));
140        assertEquals("q42", m.group(2));
141
142        assertTrue(m.find());
143        assertEquals(15, m.start());
144        assertEquals(23, m.end());
145        assertEquals(2, m.groupCount());
146        assertEquals(15, m.start(0));
147        assertEquals(23, m.end(0));
148        assertEquals(15, m.start(1));
149        assertEquals(18, m.end(1));
150        assertEquals(19, m.start(2));
151        assertEquals(23, m.end(2));
152        assertEquals("p63#q888", m.group());
153        assertEquals("p63#q888", m.group(0));
154        assertEquals("p63", m.group(1));
155        assertEquals("q888", m.group(2));
156        assertFalse(m.find());
157    }
158
159    public void testReplace() throws PatternSyntaxException {
160        Pattern p;
161        Matcher m;
162
163        // Note: examples from book,
164        // Hitchens, Ron, 2002, "Java NIO", O'Reilly, page 171
165        p = Pattern.compile("a*b");
166
167        m = p.matcher("aabfooaabfooabfoob");
168        assertTrue(m.replaceAll("-").equals("-foo-foo-foo-"));
169        assertTrue(m.replaceFirst("-").equals("-fooaabfooabfoob"));
170
171        /*
172         * p = Pattern.compile ("\\p{Blank}");
173         *
174         * m = p.matcher ("fee fie foe fum"); assertTrue
175         * (m.replaceFirst("-").equals ("fee-fie foe fum")); assertTrue
176         * (m.replaceAll("-").equals ("fee-fie-foe-fum"));
177         */
178
179        p = Pattern.compile("([bB])yte");
180
181        m = p.matcher("Byte for byte");
182        assertTrue(m.replaceFirst("$1ite").equals("Bite for byte"));
183        assertTrue(m.replaceAll("$1ite").equals("Bite for bite"));
184
185        p = Pattern.compile("\\d\\d\\d\\d([- ])");
186
187        m = p.matcher("card #1234-5678-1234");
188        assertTrue(m.replaceFirst("xxxx$1").equals("card #xxxx-5678-1234"));
189        assertTrue(m.replaceAll("xxxx$1").equals("card #xxxx-xxxx-1234"));
190
191        p = Pattern.compile("(up|left)( *)(right|down)");
192
193        m = p.matcher("left right, up down");
194        assertTrue(m.replaceFirst("$3$2$1").equals("right left, up down"));
195        assertTrue(m.replaceAll("$3$2$1").equals("right left, down up"));
196
197        p = Pattern.compile("([CcPp][hl]e[ea]se)");
198
199        m = p.matcher("I want cheese. Please.");
200        assertTrue(m.replaceFirst("<b> $1 </b>").equals(
201                "I want <b> cheese </b>. Please."));
202        assertTrue(m.replaceAll("<b> $1 </b>").equals(
203                "I want <b> cheese </b>. <b> Please </b>."));
204    }
205
206    public void testEscapes() throws PatternSyntaxException {
207        Pattern p;
208        Matcher m;
209
210        // Test \\ sequence
211        p = Pattern.compile("([a-z]+)\\\\([a-z]+);");
212        m = p.matcher("fred\\ginger;abbott\\costello;jekell\\hyde;");
213        assertTrue(m.find());
214        assertEquals("fred", m.group(1));
215        assertEquals("ginger", m.group(2));
216        assertTrue(m.find());
217        assertEquals("abbott", m.group(1));
218        assertEquals("costello", m.group(2));
219        assertTrue(m.find());
220        assertEquals("jekell", m.group(1));
221        assertEquals("hyde", m.group(2));
222        assertFalse(m.find());
223
224        // Test \n, \t, \r, \f, \e, \a sequences
225        p = Pattern.compile("([a-z]+)[\\n\\t\\r\\f\\e\\a]+([a-z]+)");
226        m = p.matcher("aa\nbb;cc\u0009\rdd;ee\u000C\u001Bff;gg\n\u0007hh");
227        assertTrue(m.find());
228        assertEquals("aa", m.group(1));
229        assertEquals("bb", m.group(2));
230        assertTrue(m.find());
231        assertEquals("cc", m.group(1));
232        assertEquals("dd", m.group(2));
233        assertTrue(m.find());
234        assertEquals("ee", m.group(1));
235        assertEquals("ff", m.group(2));
236        assertTrue(m.find());
237        assertEquals("gg", m.group(1));
238        assertEquals("hh", m.group(2));
239        assertFalse(m.find());
240
241        // Test \\u and \\x sequences
242p = Pattern.compile("([0-9]+)[\\u0020:\\x21];");
243        m = p.matcher("11:;22 ;33-;44!;");
244        assertTrue(m.find());
245        assertEquals("11", m.group(1));
246        assertTrue(m.find());
247        assertEquals("22", m.group(1));
248        assertTrue(m.find());
249        assertEquals("44", m.group(1));
250        assertFalse(m.find());
251
252        // Test invalid unicode sequences
253        try {
254            p = Pattern.compile("\\u");
255            fail("PatternSyntaxException expected");
256        } catch (PatternSyntaxException e) {
257        }
258
259        try {
260            p = Pattern.compile("\\u;");
261            fail("PatternSyntaxException expected");
262        } catch (PatternSyntaxException e) {
263        }
264
265        try {
266            p = Pattern.compile("\\u002");
267            fail("PatternSyntaxException expected");
268        } catch (PatternSyntaxException e) {
269        }
270
271        try {
272            p = Pattern.compile("\\u002;");
273            fail("PatternSyntaxException expected");
274        } catch (PatternSyntaxException e) {
275        }
276
277        // Test invalid hex sequences
278        try {
279            p = Pattern.compile("\\x");
280            fail("PatternSyntaxException expected");
281        } catch (PatternSyntaxException e) {
282        }
283
284        try {
285            p = Pattern.compile("\\x;");
286            fail("PatternSyntaxException expected");
287        } catch (PatternSyntaxException e) {
288        }
289
290        // icu4c allows 1 to 6 hex digits in \x escapes.
291        p = Pattern.compile("\\xa");
292        p = Pattern.compile("\\xab");
293        p = Pattern.compile("\\xabc");
294        p = Pattern.compile("\\xabcd");
295        p = Pattern.compile("\\xabcde");
296        p = Pattern.compile("\\xabcdef");
297        // (Further digits would just be treated as characters after the escape.)
298        try {
299            p = Pattern.compile("\\xg");
300            fail();
301        } catch (PatternSyntaxException expected) {
302        }
303
304        // Test \0 (octal) sequences (1, 2 and 3 digit)
305        p = Pattern.compile("([0-9]+)[\\07\\040\\0160];");
306        m = p.matcher("11\u0007;22:;33 ;44p;");
307        assertTrue(m.find());
308        assertEquals("11", m.group(1));
309        assertTrue(m.find());
310        assertEquals("33", m.group(1));
311        assertTrue(m.find());
312        assertEquals("44", m.group(1));
313        assertFalse(m.find());
314
315        // Test invalid octal sequences
316        try {
317            p = Pattern.compile("\\08");
318            fail("PatternSyntaxException expected");
319        } catch (PatternSyntaxException e) {
320        }
321
322        // originally contributed test did not check the result
323        // TODO: check what RI does here
324        // try {
325        // p = Pattern.compile("\\0477");
326        // fail("PatternSyntaxException expected");
327        // } catch (PatternSyntaxException e) {
328        // }
329
330        try {
331            p = Pattern.compile("\\0");
332            fail("PatternSyntaxException expected");
333        } catch (PatternSyntaxException e) {
334        }
335
336        try {
337            p = Pattern.compile("\\0;");
338            fail("PatternSyntaxException expected");
339        } catch (PatternSyntaxException e) {
340        }
341
342        // Test \c (control character) sequence
343        p = Pattern.compile("([0-9]+)[\\cA\\cB\\cC\\cD];");
344        m = p.matcher("11\u0001;22:;33\u0002;44p;55\u0003;66\u0004;");
345        assertTrue(m.find());
346        assertEquals("11", m.group(1));
347        assertTrue(m.find());
348        assertEquals("33", m.group(1));
349        assertTrue(m.find());
350        assertEquals("55", m.group(1));
351        assertTrue(m.find());
352        assertEquals("66", m.group(1));
353        assertFalse(m.find());
354
355        // More thorough control escape test
356        // Ensure that each escape matches exactly the corresponding
357        // character
358        // code and no others (well, from 0-255 at least)
359        int i, j;
360        for (i = 0; i < 26; i++) {
361            p = Pattern.compile("\\c" + Character.toString((char) ('A' + i)));
362            int match_char = -1;
363            for (j = 0; j < 255; j++) {
364                m = p.matcher(Character.toString((char) j));
365                if (m.matches()) {
366                    assertEquals(-1, match_char);
367                    match_char = j;
368                }
369            }
370            assertTrue(match_char == i + 1);
371        }
372
373        // Test invalid control escapes
374        // icu4c 50 accepts this pattern, and treats it as a literal.
375        //try {
376            p = Pattern.compile("\\c");
377            assertTrue(p.matcher("x\\cy").find());
378        //    fail(p.matcher("").toString());
379        //} catch (PatternSyntaxException e) {
380        //}
381
382        // But \cH works.
383        p = Pattern.compile("\\cH");
384        assertTrue(p.matcher("x\u0008y").find());
385        assertFalse(p.matcher("x\\cHy").find());
386
387        // originally contributed test did not check the result
388        // TODO: check what RI does here
389        // try {
390        // p = Pattern.compile("\\c;");
391        // fail("PatternSyntaxException expected");
392        // } catch (PatternSyntaxException e) {
393        // }
394        //
395        // try {
396        // p = Pattern.compile("\\ca;");
397        // fail("PatternSyntaxException expected");
398        // } catch (PatternSyntaxException e) {
399        // }
400        //
401        // try {
402        // p = Pattern.compile("\\c4;");
403        // fail("PatternSyntaxException expected");
404        // } catch (PatternSyntaxException e) {
405        // }
406    }
407
408    public void testCharacterClasses() throws PatternSyntaxException {
409        Pattern p;
410        Matcher m;
411
412        // Test one character range
413        p = Pattern.compile("[p].*[l]");
414        m = p.matcher("paul");
415        assertTrue(m.matches());
416        m = p.matcher("pool");
417        assertTrue(m.matches());
418        m = p.matcher("pong");
419        assertFalse(m.matches());
420        m = p.matcher("pl");
421        assertTrue(m.matches());
422
423        // Test two character range
424        p = Pattern.compile("[pm].*[lp]");
425        m = p.matcher("prop");
426        assertTrue(m.matches());
427        m = p.matcher("mall");
428        assertTrue(m.matches());
429        m = p.matcher("pong");
430        assertFalse(m.matches());
431        m = p.matcher("pill");
432        assertTrue(m.matches());
433
434        // Test range including [ and ]
435        p = Pattern.compile("[<\\[].*[\\]>]");
436        m = p.matcher("<foo>");
437        assertTrue(m.matches());
438        m = p.matcher("[bar]");
439        assertTrue(m.matches());
440        m = p.matcher("{foobar]");
441        assertFalse(m.matches());
442        m = p.matcher("<pill]");
443        assertTrue(m.matches());
444
445        // Test range using ^
446        p = Pattern.compile("[^bc][a-z]+[tr]");
447        m = p.matcher("pat");
448        assertTrue(m.matches());
449        m = p.matcher("liar");
450        assertTrue(m.matches());
451        m = p.matcher("car");
452        assertFalse(m.matches());
453        m = p.matcher("gnat");
454        assertTrue(m.matches());
455
456        // Test character range using -
457        p = Pattern.compile("[a-z]_+[a-zA-Z]-+[0-9p-z]");
458        m = p.matcher("d__F-8");
459        assertTrue(m.matches());
460        m = p.matcher("c_a-q");
461        assertTrue(m.matches());
462        m = p.matcher("a__R-a");
463        assertFalse(m.matches());
464        m = p.matcher("r_____d-----5");
465        assertTrue(m.matches());
466
467        // Test range using unicode characters and unicode and hex escapes
468        p = Pattern.compile("[\\u1234-\\u2345]_+[a-z]-+[\u0001-\\x11]");
469        m = p.matcher("\u2000_q-\u0007");
470        assertTrue(m.matches());
471        m = p.matcher("\u1234_z-\u0001");
472        assertTrue(m.matches());
473        m = p.matcher("r_p-q");
474        assertFalse(m.matches());
475        m = p.matcher("\u2345_____d-----\n");
476        assertTrue(m.matches());
477
478        // Test ranges including the "-" character
479        // "---" collides with icu4c's "--" operator, and likely to be user error anyway.
480        if (false) {
481            p = Pattern.compile("[\\*-/]_+[---]!+[--AP]");
482            m = p.matcher("-_-!!A");
483            assertTrue(m.matches());
484            m = p.matcher("\u002b_-!!!-");
485            assertTrue(m.matches());
486            m = p.matcher("!_-!@");
487            assertFalse(m.matches());
488            m = p.matcher(",______-!!!!!!!P");
489            assertTrue(m.matches());
490        }
491
492        // Test nested ranges
493        p = Pattern.compile("[pm[t]][a-z]+[[r]lp]");
494        m = p.matcher("prop");
495        assertTrue(m.matches());
496        m = p.matcher("tsar");
497        assertTrue(m.matches());
498        m = p.matcher("pong");
499        assertFalse(m.matches());
500        m = p.matcher("moor");
501        assertTrue(m.matches());
502
503        // Test character class intersection with &&
504        // TODO: figure out what x&&y or any class with a null intersection
505        // set (like [[a-c]&&[d-f]]) might mean. It doesn't mean "match
506        // nothing" and doesn't mean "match anything" so I'm stumped.
507        p = Pattern.compile("[[a-p]&&[g-z]]+-+[[a-z]&&q]-+[x&&[a-z]]-+");
508        m = p.matcher("h--q--x--");
509        assertTrue(m.matches());
510        m = p.matcher("hog--q-x-");
511        assertTrue(m.matches());
512        m = p.matcher("ape--q-x-");
513        assertFalse(m.matches());
514        m = p.matcher("mop--q-x----");
515        assertTrue(m.matches());
516
517        // Test error cases with &&
518        // This is an RI bug that icu4c doesn't have.
519        if (false) {
520            p = Pattern.compile("[&&[xyz]]");
521            m = p.matcher("&");
522            // System.out.println(m.matches());
523            m = p.matcher("x");
524            // System.out.println(m.matches());
525            m = p.matcher("y");
526            // System.out.println(m.matches());
527        }
528        p = Pattern.compile("[[xyz]&[axy]]");
529        m = p.matcher("x");
530        // System.out.println(m.matches());
531        m = p.matcher("z");
532        // System.out.println(m.matches());
533        m = p.matcher("&");
534        // System.out.println(m.matches());
535        p = Pattern.compile("[abc[123]&&[345]def]");
536        m = p.matcher("a");
537        // System.out.println(m.matches());
538
539        // icu4c rightly considers a missing rhs to && a syntax error.
540        if (false) {
541            p = Pattern.compile("[[xyz]&&]");
542        }
543
544        p = Pattern.compile("[[abc]&]");
545
546        try {
547            p = Pattern.compile("[[abc]&&");
548            fail("PatternSyntaxException expected");
549        } catch (PatternSyntaxException e) {
550        }
551
552        p = Pattern.compile("[[abc]\\&&[xyz]]");
553
554        p = Pattern.compile("[[abc]&\\&[xyz]]");
555
556        // Test 3-way intersection
557        p = Pattern.compile("[[a-p]&&[g-z]&&[d-k]]");
558        m = p.matcher("g");
559        assertTrue(m.matches());
560        m = p.matcher("m");
561        assertFalse(m.matches());
562
563        // Test nested intersection
564        p = Pattern.compile("[[[a-p]&&[g-z]]&&[d-k]]");
565        m = p.matcher("g");
566        assertTrue(m.matches());
567        m = p.matcher("m");
568        assertFalse(m.matches());
569
570        // Test character class subtraction with && and ^
571        p = Pattern.compile("[[a-z]&&[^aeiou]][aeiou][[^xyz]&&[a-z]]");
572        m = p.matcher("pop");
573        assertTrue(m.matches());
574        m = p.matcher("tag");
575        assertTrue(m.matches());
576        m = p.matcher("eat");
577        assertFalse(m.matches());
578        m = p.matcher("tax");
579        assertFalse(m.matches());
580        m = p.matcher("zip");
581        assertTrue(m.matches());
582
583        // Test . (DOT), with and without DOTALL
584        // Note: DOT not allowed in character classes
585        p = Pattern.compile(".+/x.z");
586        m = p.matcher("!$/xyz");
587        assertTrue(m.matches());
588        m = p.matcher("%\n\r/x\nz");
589        assertFalse(m.matches());
590        p = Pattern.compile(".+/x.z", Pattern.DOTALL);
591        m = p.matcher("%\n\r/x\nz");
592        assertTrue(m.matches());
593
594        // Test \d (digit)
595        p = Pattern.compile("\\d+[a-z][\\dx]");
596        m = p.matcher("42a6");
597        assertTrue(m.matches());
598        m = p.matcher("21zx");
599        assertTrue(m.matches());
600        m = p.matcher("ab6");
601        assertFalse(m.matches());
602        m = p.matcher("56912f9");
603        assertTrue(m.matches());
604
605        // Test \D (not a digit)
606        p = Pattern.compile("\\D+[a-z]-[\\D3]");
607        m = p.matcher("za-p");
608        assertTrue(m.matches());
609        m = p.matcher("%!e-3");
610        assertTrue(m.matches());
611        m = p.matcher("9a-x");
612        assertFalse(m.matches());
613        m = p.matcher("\u1234pp\ny-3");
614        assertTrue(m.matches());
615
616        // Test \s (whitespace)
617        p = Pattern.compile("<[a-zA-Z]+\\s+[0-9]+[\\sx][^\\s]>");
618        m = p.matcher("<cat \t1\fx>");
619        assertTrue(m.matches());
620        m = p.matcher("<cat \t1\f >");
621        assertFalse(m.matches());
622        m = p
623                .matcher("xyz <foo\n\r22 5> <pp \t\n\f\r \u000b41x\u1234><pp \nx7\rc> zzz");
624        assertTrue(m.find());
625        assertTrue(m.find());
626        assertFalse(m.find());
627
628        // Test \S (not whitespace)
629        p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221>");
630        m = p.matcher("<f $0**\n** 221>");
631        assertTrue(m.matches());
632        m = p.matcher("<x 441\t221>");
633        assertTrue(m.matches());
634        m = p.matcher("<z \t9\ng 221>");
635        assertFalse(m.matches());
636        m = p.matcher("<z 60\ngg\u1234\f221>");
637        assertTrue(m.matches());
638        p = Pattern.compile("<[a-z] \\S[0-9][\\S\n]+[^\\S]221[\\S&&[^abc]]>");
639        m = p.matcher("<f $0**\n** 221x>");
640        assertTrue(m.matches());
641        m = p.matcher("<x 441\t221z>");
642        assertTrue(m.matches());
643        m = p.matcher("<x 441\t221 >");
644        assertFalse(m.matches());
645        m = p.matcher("<x 441\t221c>");
646        assertFalse(m.matches());
647        m = p.matcher("<z \t9\ng 221x>");
648        assertFalse(m.matches());
649        m = p.matcher("<z 60\ngg\u1234\f221\u0001>");
650        assertTrue(m.matches());
651
652        // Test \w (ascii word)
653        p = Pattern.compile("<\\w+\\s[0-9]+;[^\\w]\\w+/[\\w$]+;");
654        m = p.matcher("<f1 99;!foo5/a$7;");
655        assertTrue(m.matches());
656        m = p.matcher("<f$ 99;!foo5/a$7;");
657        assertFalse(m.matches());
658        m = p
659                .matcher("<abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789 99;!foo5/a$7;");
660        assertTrue(m.matches());
661
662        // Test \W (not an ascii word)
663        p = Pattern.compile("<\\W\\w+\\s[0-9]+;[\\W_][^\\W]+\\s[0-9]+;");
664        m = p.matcher("<$foo3\n99;_bar\t0;");
665        assertTrue(m.matches());
666        m = p.matcher("<hh 99;_g 0;");
667        assertFalse(m.matches());
668        m = p.matcher("<*xx\t00;^zz\f11;");
669        assertTrue(m.matches());
670
671        // Test x|y pattern
672        // TODO
673    }
674
675    public void testPOSIXGroups() throws PatternSyntaxException {
676        Pattern p;
677        Matcher m;
678
679        // Test POSIX groups using \p and \P (in the group and not in the group)
680        // Groups are Lower, Upper, ASCII, Alpha, Digit, XDigit, Alnum, Punct,
681        // Graph, Print, Blank, Space, Cntrl
682        // Test \p{Lower}
683        /*
684         * FIXME: Requires complex range processing p = Pattern.compile("<\\p{Lower}\\d\\P{Lower}:[\\p{Lower}Z]\\s[^\\P{Lower}]>");
685         * m = p.matcher("<a4P:g x>"); assertTrue(m.matches()); m = p.matcher("<p4%:Z\tq>");
686         * assertTrue(m.matches()); m = p.matcher("<A6#:e e>");
687         * assertFalse(m.matches());
688         */
689        p = Pattern.compile("\\p{Lower}+");
690        m = p.matcher("abcdefghijklmnopqrstuvwxyz");
691        assertTrue(m.matches());
692
693        // Invalid uses of \p{Lower}
694        try {
695            p = Pattern.compile("\\p");
696            fail("PatternSyntaxException expected");
697        } catch (PatternSyntaxException e) {
698        }
699
700        try {
701            p = Pattern.compile("\\p;");
702            fail("PatternSyntaxException expected");
703        } catch (PatternSyntaxException e) {
704        }
705
706        try {
707            p = Pattern.compile("\\p{");
708            fail("PatternSyntaxException expected");
709        } catch (PatternSyntaxException e) {
710        }
711
712        try {
713            p = Pattern.compile("\\p{;");
714            fail("PatternSyntaxException expected");
715        } catch (PatternSyntaxException e) {
716        }
717
718        try {
719            p = Pattern.compile("\\p{Lower");
720            fail("PatternSyntaxException expected");
721        } catch (PatternSyntaxException e) {
722        }
723
724        try {
725            p = Pattern.compile("\\p{Lower;");
726            fail("PatternSyntaxException expected");
727        } catch (PatternSyntaxException e) {
728        }
729
730        // Test \p{Upper}
731        /*
732         * FIXME: Requires complex range processing p = Pattern.compile("<\\p{Upper}\\d\\P{Upper}:[\\p{Upper}z]\\s[^\\P{Upper}]>");
733         * m = p.matcher("<A4p:G X>"); assertTrue(m.matches()); m = p.matcher("<P4%:z\tQ>");
734         * assertTrue(m.matches()); m = p.matcher("<a6#:E E>");
735         * assertFalse(m.matches());
736         */
737        p = Pattern.compile("\\p{Upper}+");
738        m = p.matcher("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
739        assertTrue(m.matches());
740
741        // Invalid uses of \p{Upper}
742        try {
743            p = Pattern.compile("\\p{Upper");
744            fail("PatternSyntaxException expected");
745        } catch (PatternSyntaxException e) {
746        }
747
748        try {
749            p = Pattern.compile("\\p{Upper;");
750            fail("PatternSyntaxException expected");
751        } catch (PatternSyntaxException e) {
752        }
753
754        // Test \p{ASCII}
755        /*
756         * FIXME: Requires complex range processing p = Pattern.compile("<\\p{ASCII}\\d\\P{ASCII}:[\\p{ASCII}\u1234]\\s[^\\P{ASCII}]>");
757         * m = p.matcher("<A4\u0080:G X>"); assertTrue(m.matches()); m =
758         * p.matcher("<P4\u00ff:\u1234\t\n>"); assertTrue(m.matches()); m =
759         * p.matcher("<\u00846#:E E>"); assertFalse(m.matches())
760         */
761        int i;
762        p = Pattern.compile("\\p{ASCII}");
763        for (i = 0; i < 0x80; i++) {
764            m = p.matcher(Character.toString((char) i));
765            assertTrue(m.matches());
766        }
767        for (; i < 0xff; i++) {
768            m = p.matcher(Character.toString((char) i));
769            assertFalse(m.matches());
770        }
771
772        // Invalid uses of \p{ASCII}
773        try {
774            p = Pattern.compile("\\p{ASCII");
775            fail("PatternSyntaxException expected");
776        } catch (PatternSyntaxException e) {
777        }
778
779        try {
780            p = Pattern.compile("\\p{ASCII;");
781            fail("PatternSyntaxException expected");
782        } catch (PatternSyntaxException e) {
783        }
784
785        // Test \p{Alpha}
786        // TODO
787
788        // Test \p{Digit}
789        // TODO
790
791        // Test \p{XDigit}
792        // TODO
793
794        // Test \p{Alnum}
795        // TODO
796
797        // Test \p{Punct}
798        // TODO
799
800        // Test \p{Graph}
801        // TODO
802
803        // Test \p{Print}
804        // TODO
805
806        // Test \p{Blank}
807        // TODO
808
809        // Test \p{Space}
810        // TODO
811
812        // Test \p{Cntrl}
813        // TODO
814    }
815
816    public void testUnicodeBlocks() throws PatternSyntaxException {
817        Pattern p;
818        Matcher m;
819        int i, j;
820
821        // Test Unicode blocks using \p and \P
822        // FIXME:
823        // Note that LatinExtended-B and ArabicPresentations-B are unrecognized
824        // by the reference JDK.
825        for (i = 0; i < UBlocks.length; i++) {
826            /*
827             * p = Pattern.compile("\\p{"+UBlocks[i].name+"}");
828             *
829             * if (UBlocks[i].low > 0) { m =
830             * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
831             * assertFalse(m.matches()); } for (j=UBlocks[i].low; j <=
832             * UBlocks[i].high; j++) { m =
833             * p.matcher(Character.toString((char)j)); assertTrue(m.matches()); }
834             * if (UBlocks[i].high < 0xFFFF) { m =
835             * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
836             * assertFalse(m.matches()); }
837             *
838             * p = Pattern.compile("\\P{"+UBlocks[i].name+"}");
839             *
840             * if (UBlocks[i].low > 0) { m =
841             * p.matcher(Character.toString((char)(UBlocks[i].low-1)));
842             * assertTrue(m.matches()); } for (j=UBlocks[i].low; j <
843             * UBlocks[i].high; j++) { m =
844             * p.matcher(Character.toString((char)j)); assertFalse(m.matches()); }
845             * if (UBlocks[i].high < 0xFFFF) { m =
846             * p.matcher(Character.toString((char)(UBlocks[i].high+1)));
847             * assertTrue(m.matches()); }
848             */
849
850            p = Pattern.compile("\\p{In" + UBlocks[i].name + "}");
851
852            if (UBlocks[i].low > 0) {
853                m = p.matcher(Character.toString((char) (UBlocks[i].low - 1)));
854                assertFalse(UBlocks[i].name, m.matches());
855            }
856            for (j = UBlocks[i].low; j <= UBlocks[i].high; j++) {
857                m = p.matcher(Character.toString((char) j));
858                assertTrue(UBlocks[i].name, m.matches());
859            }
860            if (UBlocks[i].high < 0xFFFF) {
861                m = p.matcher(Character.toString((char) (UBlocks[i].high + 1)));
862                assertFalse(UBlocks[i].name, m.matches());
863            }
864
865            p = Pattern.compile("\\P{In" + UBlocks[i].name + "}");
866
867            if (UBlocks[i].low > 0) {
868                m = p.matcher(Character.toString((char) (UBlocks[i].low - 1)));
869                assertTrue(UBlocks[i].name, m.matches());
870            }
871            for (j = UBlocks[i].low; j < UBlocks[i].high; j++) {
872                m = p.matcher(Character.toString((char) j));
873                assertFalse(UBlocks[i].name, m.matches());
874            }
875            if (UBlocks[i].high < 0xFFFF) {
876                m = p.matcher(Character.toString((char) (UBlocks[i].high + 1)));
877                assertTrue(UBlocks[i].name, m.matches());
878            }
879        }
880    }
881
882    public void testMisc() throws PatternSyntaxException {
883        Pattern p;
884        Matcher m;
885
886        // Test (?>...)
887        // TODO
888
889        // Test (?onflags-offflags)
890        // Valid flags are i,m,d,s,u,x
891        // TODO
892
893        // Test (?onflags-offflags:...)
894        // TODO
895
896        // Test \Q, \E
897        p = Pattern.compile("[a-z]+;\\Q[a-z]+;\\Q(foo.*);\\E[0-9]+");
898        m = p.matcher("abc;[a-z]+;\\Q(foo.*);411");
899        assertTrue(m.matches());
900        m = p.matcher("abc;def;foo42;555");
901        assertFalse(m.matches());
902        m = p.matcher("abc;\\Qdef;\\Qfoo99;\\E123");
903        assertFalse(m.matches());
904
905        p = Pattern.compile("[a-z]+;(foo[0-9]-\\Q(...)\\E);[0-9]+");
906        m = p.matcher("abc;foo5-(...);123");
907        assertTrue(m.matches());
908        assertEquals("foo5-(...)", m.group(1));
909        m = p.matcher("abc;foo9-(xxx);789");
910        assertFalse(m.matches());
911
912        p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q$-\\E]+);[0-9]+");
913        m = p.matcher("abc;bar0-def$-;123");
914        assertTrue(m.matches());
915
916        // FIXME:
917        // This should work the same as the pattern above but fails with the
918        // the reference JDK
919        p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q-$\\E]+);[0-9]+");
920        m = p.matcher("abc;bar0-def$-;123");
921        // assertTrue(m.matches());
922
923        // FIXME:
924        // This should work too .. it looks as if just about anything that
925        // has more
926        // than one character between \Q and \E is broken in the the reference
927        // JDK
928        p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\Q[0-9]\\E]+);[0-9]+");
929        m = p.matcher("abc;bar0-def[99]-]0x[;123");
930        // assertTrue(m.matches());
931
932        // This is the same as above but with explicit escapes .. and this
933        // does work
934        // on the the reference JDK
935        p = Pattern.compile("[a-z]+;(bar[0-9]-[a-z\\[0\\-9\\]]+);[0-9]+");
936        m = p.matcher("abc;bar0-def[99]-]0x[;123");
937        assertTrue(m.matches());
938
939        // Test #<comment text>
940        // TODO
941    }
942
943    public void testCompile1() throws PatternSyntaxException {
944        Pattern pattern = Pattern
945                .compile("[0-9A-Za-z][0-9A-Za-z\\x2e\\x3a\\x2d\\x5f]*");
946        String name = "iso-8859-1";
947        assertTrue(pattern.matcher(name).matches());
948    }
949
950    public void testCompile2() throws PatternSyntaxException {
951        String findString = "\\Qimport\\E";
952
953        Pattern pattern = Pattern.compile(findString, 0);
954        Matcher matcher = pattern.matcher(new String(
955                "import a.A;\n\n import b.B;\nclass C {}"));
956
957        assertTrue(matcher.find(0));
958    }
959
960    public void testCompile3() throws PatternSyntaxException {
961        Pattern p;
962        Matcher m;
963        p = Pattern.compile("a$");
964        m = p.matcher("a\n");
965        assertTrue(m.find());
966        assertEquals("a", m.group());
967        assertFalse(m.find());
968
969        p = Pattern.compile("(a$)");
970        m = p.matcher("a\n");
971        assertTrue(m.find());
972        assertEquals("a", m.group());
973        assertEquals("a", m.group(1));
974        assertFalse(m.find());
975
976        p = Pattern.compile("^.*$", Pattern.MULTILINE);
977
978        m = p.matcher("a\n");
979        assertTrue(m.find());
980        // System.out.println("["+m.group()+"]");
981        assertEquals("a", m.group());
982        assertFalse(m.find());
983
984        m = p.matcher("a\nb\n");
985        assertTrue(m.find());
986        // System.out.println("["+m.group()+"]");
987        assertEquals("a", m.group());
988        assertTrue(m.find());
989        // System.out.println("["+m.group()+"]");
990        assertEquals("b", m.group());
991        assertFalse(m.find());
992
993        m = p.matcher("a\nb");
994        assertTrue(m.find());
995        // System.out.println("["+m.group()+"]");
996        assertEquals("a", m.group());
997        assertTrue(m.find());
998        assertEquals("b", m.group());
999        assertFalse(m.find());
1000
1001        m = p.matcher("\naa\r\nbb\rcc\n\n");
1002        assertTrue(m.find());
1003        // System.out.println("["+m.group()+"]");
1004        assertTrue(m.group().equals(""));
1005        assertTrue(m.find());
1006        // System.out.println("["+m.group()+"]");
1007        assertEquals("aa", m.group());
1008        assertTrue(m.find());
1009        // System.out.println("["+m.group()+"]");
1010        assertEquals("bb", m.group());
1011        assertTrue(m.find());
1012        // System.out.println("["+m.group()+"]");
1013        assertEquals("cc", m.group());
1014        assertTrue(m.find());
1015        // System.out.println("["+m.group()+"]");
1016        assertTrue(m.group().equals(""));
1017        assertFalse(m.find());
1018
1019        m = p.matcher("a");
1020        assertTrue(m.find());
1021        assertEquals("a", m.group());
1022        assertFalse(m.find());
1023
1024        m = p.matcher("");
1025        // This differs from the RI behaviour but seems more correct.
1026        assertTrue(m.find());
1027        assertTrue(m.group().equals(""));
1028        assertFalse(m.find());
1029
1030        p = Pattern.compile("^.*$");
1031        m = p.matcher("");
1032        assertTrue(m.find());
1033        assertTrue(m.group().equals(""));
1034        assertFalse(m.find());
1035    }
1036
1037    public void testCompile4() throws PatternSyntaxException {
1038        String findString = "\\Qpublic\\E";
1039        StringBuffer text = new StringBuffer("    public class Class {\n"
1040                + "    public class Class {");
1041
1042        Pattern pattern = Pattern.compile(findString, 0);
1043        Matcher matcher = pattern.matcher(text);
1044
1045        boolean found = matcher.find();
1046        assertTrue(found);
1047        assertEquals(4, matcher.start());
1048        if (found) {
1049            // modify text
1050            text.delete(0, text.length());
1051            text.append("Text have been changed.");
1052            matcher.reset(text);
1053        }
1054
1055        found = matcher.find();
1056        assertFalse(found);
1057    }
1058
1059    public void testCompile5() throws PatternSyntaxException {
1060        Pattern p = Pattern.compile("^[0-9]");
1061        String s[] = p.split("12", -1);
1062        assertEquals("", s[0]);
1063        assertEquals("2", s[1]);
1064        assertEquals(2, s.length);
1065    }
1066
1067    // public void testCompile6() {
1068    // String regex = "[\\p{L}[\\p{Mn}[\\p{Pc}[\\p{Nd}[\\p{Nl}[\\p{Sc}]]]]]]+";
1069    // String regex = "[\\p{L}\\p{Mn}\\p{Pc}\\p{Nd}\\p{Nl}\\p{Sc}]+";
1070    // try {
1071    // Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
1072    // assertTrue(true);
1073    // } catch (PatternSyntaxException e) {
1074    // System.out.println(e.getMessage());
1075    // assertTrue(false);
1076    // }
1077    // }
1078
1079    private static class UBInfo {
1080        public UBInfo(int low, int high, String name) {
1081            this.name = name;
1082            this.low = low;
1083            this.high = high;
1084        }
1085
1086        public String name;
1087
1088        public int low, high;
1089    }
1090
1091    // A table representing the unicode categories
1092    // private static UBInfo[] UCategories = {
1093    // Lu
1094    // Ll
1095    // Lt
1096    // Lm
1097    // Lo
1098    // Mn
1099    // Mc
1100    // Me
1101    // Nd
1102    // Nl
1103    // No
1104    // Pc
1105    // Pd
1106    // Ps
1107    // Pe
1108    // Pi
1109    // Pf
1110    // Po
1111    // Sm
1112    // Sc
1113    // Sk
1114    // So
1115    // Zs
1116    // Zl
1117    // Zp
1118    // Cc
1119    // Cf
1120    // Cs
1121    // Co
1122    // Cn
1123    // };
1124
1125    // A table representing the unicode character blocks
1126    private static UBInfo[] UBlocks = {
1127    /* 0000; 007F; Basic Latin */
1128    new UBInfo(0x0000, 0x007F, "BasicLatin"), // Character.UnicodeBlock.BASIC_LATIN
1129            /* 0080; 00FF; Latin-1 Supplement */
1130            new UBInfo(0x0080, 0x00FF, "Latin-1Supplement"), // Character.UnicodeBlock.LATIN_1_SUPPLEMENT
1131            /* 0100; 017F; Latin Extended-A */
1132            new UBInfo(0x0100, 0x017F, "LatinExtended-A"), // Character.UnicodeBlock.LATIN_EXTENDED_A
1133            /* 0180; 024F; Latin Extended-B */
1134            // new UBInfo (0x0180,0x024F,"InLatinExtended-B"), //
1135            // Character.UnicodeBlock.LATIN_EXTENDED_B
1136            /* 0250; 02AF; IPA Extensions */
1137            new UBInfo(0x0250, 0x02AF, "IPAExtensions"), // Character.UnicodeBlock.IPA_EXTENSIONS
1138            /* 02B0; 02FF; Spacing Modifier Letters */
1139            new UBInfo(0x02B0, 0x02FF, "SpacingModifierLetters"), // Character.UnicodeBlock.SPACING_MODIFIER_LETTERS
1140            /* 0300; 036F; Combining Diacritical Marks */
1141            new UBInfo(0x0300, 0x036F, "CombiningDiacriticalMarks"), // Character.UnicodeBlock.COMBINING_DIACRITICAL_MARKS
1142            /* 0370; 03FF; Greek */
1143            new UBInfo(0x0370, 0x03FF, "Greek"), // Character.UnicodeBlock.GREEK
1144            /* 0400; 04FF; Cyrillic */
1145            new UBInfo(0x0400, 0x04FF, "Cyrillic"), // Character.UnicodeBlock.CYRILLIC
1146            /* 0530; 058F; Armenian */
1147            new UBInfo(0x0530, 0x058F, "Armenian"), // Character.UnicodeBlock.ARMENIAN
1148            /* 0590; 05FF; Hebrew */
1149            new UBInfo(0x0590, 0x05FF, "Hebrew"), // Character.UnicodeBlock.HEBREW
1150            /* 0600; 06FF; Arabic */
1151            new UBInfo(0x0600, 0x06FF, "Arabic"), // Character.UnicodeBlock.ARABIC
1152            /* 0700; 074F; Syriac */
1153            new UBInfo(0x0700, 0x074F, "Syriac"), // Character.UnicodeBlock.SYRIAC
1154            /* 0780; 07BF; Thaana */
1155            new UBInfo(0x0780, 0x07BF, "Thaana"), // Character.UnicodeBlock.THAANA
1156            /* 0900; 097F; Devanagari */
1157            new UBInfo(0x0900, 0x097F, "Devanagari"), // Character.UnicodeBlock.DEVANAGARI
1158            /* 0980; 09FF; Bengali */
1159            new UBInfo(0x0980, 0x09FF, "Bengali"), // Character.UnicodeBlock.BENGALI
1160            /* 0A00; 0A7F; Gurmukhi */
1161            new UBInfo(0x0A00, 0x0A7F, "Gurmukhi"), // Character.UnicodeBlock.GURMUKHI
1162            /* 0A80; 0AFF; Gujarati */
1163            new UBInfo(0x0A80, 0x0AFF, "Gujarati"), // Character.UnicodeBlock.GUJARATI
1164            /* 0B00; 0B7F; Oriya */
1165            new UBInfo(0x0B00, 0x0B7F, "Oriya"), // Character.UnicodeBlock.ORIYA
1166            /* 0B80; 0BFF; Tamil */
1167            new UBInfo(0x0B80, 0x0BFF, "Tamil"), // Character.UnicodeBlock.TAMIL
1168            /* 0C00; 0C7F; Telugu */
1169            new UBInfo(0x0C00, 0x0C7F, "Telugu"), // Character.UnicodeBlock.TELUGU
1170            /* 0C80; 0CFF; Kannada */
1171            new UBInfo(0x0C80, 0x0CFF, "Kannada"), // Character.UnicodeBlock.KANNADA
1172            /* 0D00; 0D7F; Malayalam */
1173            new UBInfo(0x0D00, 0x0D7F, "Malayalam"), // Character.UnicodeBlock.MALAYALAM
1174            /* 0D80; 0DFF; Sinhala */
1175            new UBInfo(0x0D80, 0x0DFF, "Sinhala"), // Character.UnicodeBlock.SINHALA
1176            /* 0E00; 0E7F; Thai */
1177            new UBInfo(0x0E00, 0x0E7F, "Thai"), // Character.UnicodeBlock.THAI
1178            /* 0E80; 0EFF; Lao */
1179            new UBInfo(0x0E80, 0x0EFF, "Lao"), // Character.UnicodeBlock.LAO
1180            /* 0F00; 0FFF; Tibetan */
1181            new UBInfo(0x0F00, 0x0FFF, "Tibetan"), // Character.UnicodeBlock.TIBETAN
1182            /* 1000; 109F; Myanmar */
1183            new UBInfo(0x1000, 0x109F, "Myanmar"), // Character.UnicodeBlock.MYANMAR
1184            /* 10A0; 10FF; Georgian */
1185            new UBInfo(0x10A0, 0x10FF, "Georgian"), // Character.UnicodeBlock.GEORGIAN
1186            /* 1100; 11FF; Hangul Jamo */
1187            new UBInfo(0x1100, 0x11FF, "HangulJamo"), // Character.UnicodeBlock.HANGUL_JAMO
1188            /* 1200; 137F; Ethiopic */
1189            new UBInfo(0x1200, 0x137F, "Ethiopic"), // Character.UnicodeBlock.ETHIOPIC
1190            /* 13A0; 13FF; Cherokee */
1191            new UBInfo(0x13A0, 0x13FF, "Cherokee"), // Character.UnicodeBlock.CHEROKEE
1192            /* 1400; 167F; Unified Canadian Aboriginal Syllabics */
1193            new UBInfo(0x1400, 0x167F, "UnifiedCanadianAboriginalSyllabics"), // Character.UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1194            /* 1680; 169F; Ogham */
1195            new UBInfo(0x1680, 0x169F, "Ogham"), // Character.UnicodeBlock.OGHAM
1196            /* 16A0; 16FF; Runic */
1197            new UBInfo(0x16A0, 0x16FF, "Runic"), // Character.UnicodeBlock.RUNIC
1198            /* 1780; 17FF; Khmer */
1199            new UBInfo(0x1780, 0x17FF, "Khmer"), // Character.UnicodeBlock.KHMER
1200            /* 1800; 18AF; Mongolian */
1201            new UBInfo(0x1800, 0x18AF, "Mongolian"), // Character.UnicodeBlock.MONGOLIAN
1202            /* 1E00; 1EFF; Latin Extended Additional */
1203            new UBInfo(0x1E00, 0x1EFF, "LatinExtendedAdditional"), // Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
1204            /* 1F00; 1FFF; Greek Extended */
1205            new UBInfo(0x1F00, 0x1FFF, "GreekExtended"), // Character.UnicodeBlock.GREEK_EXTENDED
1206            /* 2000; 206F; General Punctuation */
1207            new UBInfo(0x2000, 0x206F, "GeneralPunctuation"), // Character.UnicodeBlock.GENERAL_PUNCTUATION
1208            /* 2070; 209F; Superscripts and Subscripts */
1209            new UBInfo(0x2070, 0x209F, "SuperscriptsandSubscripts"), // Character.UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS
1210            /* 20A0; 20CF; Currency Symbols */
1211            new UBInfo(0x20A0, 0x20CF, "CurrencySymbols"), // Character.UnicodeBlock.CURRENCY_SYMBOLS
1212            /* 20D0; 20FF; Combining Marks for Symbols */
1213            new UBInfo(0x20D0, 0x20FF, "CombiningMarksforSymbols"), // Character.UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS
1214            /* 2100; 214F; Letterlike Symbols */
1215            new UBInfo(0x2100, 0x214F, "LetterlikeSymbols"), // Character.UnicodeBlock.LETTERLIKE_SYMBOLS
1216            /* 2150; 218F; Number Forms */
1217            new UBInfo(0x2150, 0x218F, "NumberForms"), // Character.UnicodeBlock.NUMBER_FORMS
1218            /* 2190; 21FF; Arrows */
1219            new UBInfo(0x2190, 0x21FF, "Arrows"), // Character.UnicodeBlock.ARROWS
1220            /* 2200; 22FF; Mathematical Operators */
1221            new UBInfo(0x2200, 0x22FF, "MathematicalOperators"), // Character.UnicodeBlock.MATHEMATICAL_OPERATORS
1222            /* 2300; 23FF; Miscellaneous Technical */
1223            new UBInfo(0x2300, 0x23FF, "MiscellaneousTechnical"), // Character.UnicodeBlock.MISCELLANEOUS_TECHNICAL
1224            /* 2400; 243F; Control Pictures */
1225            new UBInfo(0x2400, 0x243F, "ControlPictures"), // Character.UnicodeBlock.CONTROL_PICTURES
1226            /* 2440; 245F; Optical Character Recognition */
1227            new UBInfo(0x2440, 0x245F, "OpticalCharacterRecognition"), // Character.UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION
1228            /* 2460; 24FF; Enclosed Alphanumerics */
1229            new UBInfo(0x2460, 0x24FF, "EnclosedAlphanumerics"), // Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS
1230            /* 2500; 257F; Box Drawing */
1231            new UBInfo(0x2500, 0x257F, "BoxDrawing"), // Character.UnicodeBlock.BOX_DRAWING
1232            /* 2580; 259F; Block Elements */
1233            new UBInfo(0x2580, 0x259F, "BlockElements"), // Character.UnicodeBlock.BLOCK_ELEMENTS
1234            /* 25A0; 25FF; Geometric Shapes */
1235            new UBInfo(0x25A0, 0x25FF, "GeometricShapes"), // Character.UnicodeBlock.GEOMETRIC_SHAPES
1236            /* 2600; 26FF; Miscellaneous Symbols */
1237            new UBInfo(0x2600, 0x26FF, "MiscellaneousSymbols"), // Character.UnicodeBlock.MISCELLANEOUS_SYMBOLS
1238            /* 2700; 27BF; Dingbats */
1239            new UBInfo(0x2700, 0x27BF, "Dingbats"), // Character.UnicodeBlock.DINGBATS
1240            /* 2800; 28FF; Braille Patterns */
1241            new UBInfo(0x2800, 0x28FF, "BraillePatterns"), // Character.UnicodeBlock.BRAILLE_PATTERNS
1242            /* 2E80; 2EFF; CJK Radicals Supplement */
1243            new UBInfo(0x2E80, 0x2EFF, "CJKRadicalsSupplement"), // Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT
1244            /* 2F00; 2FDF; Kangxi Radicals */
1245            new UBInfo(0x2F00, 0x2FDF, "KangxiRadicals"), // Character.UnicodeBlock.KANGXI_RADICALS
1246            /* 2FF0; 2FFF; Ideographic Description Characters */
1247            new UBInfo(0x2FF0, 0x2FFF, "IdeographicDescriptionCharacters"), // Character.UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1248            /* 3000; 303F; CJK Symbols and Punctuation */
1249            new UBInfo(0x3000, 0x303F, "CJKSymbolsandPunctuation"), // Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
1250            /* 3040; 309F; Hiragana */
1251            new UBInfo(0x3040, 0x309F, "Hiragana"), // Character.UnicodeBlock.HIRAGANA
1252            /* 30A0; 30FF; Katakana */
1253            new UBInfo(0x30A0, 0x30FF, "Katakana"), // Character.UnicodeBlock.KATAKANA
1254            /* 3100; 312F; Bopomofo */
1255            new UBInfo(0x3100, 0x312F, "Bopomofo"), // Character.UnicodeBlock.BOPOMOFO
1256            /* 3130; 318F; Hangul Compatibility Jamo */
1257            new UBInfo(0x3130, 0x318F, "HangulCompatibilityJamo"), // Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
1258            /* 3190; 319F; Kanbun */
1259            new UBInfo(0x3190, 0x319F, "Kanbun"), // Character.UnicodeBlock.KANBUN
1260            /* 31A0; 31BF; Bopomofo Extended */
1261            new UBInfo(0x31A0, 0x31BF, "BopomofoExtended"), // Character.UnicodeBlock.BOPOMOFO_EXTENDED
1262            /* 3200; 32FF; Enclosed CJK Letters and Months */
1263            new UBInfo(0x3200, 0x32FF, "EnclosedCJKLettersandMonths"), // Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS
1264            /* 3300; 33FF; CJK Compatibility */
1265            new UBInfo(0x3300, 0x33FF, "CJKCompatibility"), // Character.UnicodeBlock.CJK_COMPATIBILITY
1266            /* 3400; 4DB5; CJK Unified Ideographs Extension A */
1267            new UBInfo(0x3400, 0x4DBF, "CJKUnifiedIdeographsExtensionA"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1268            /* 4E00; 9FFF; CJK Unified Ideographs */
1269            new UBInfo(0x4E00, 0x9FFF, "CJKUnifiedIdeographs"), // Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
1270            /* A000; A48F; Yi Syllables */
1271            new UBInfo(0xA000, 0xA48F, "YiSyllables"), // Character.UnicodeBlock.YI_SYLLABLES
1272            /* A490; A4CF; Yi Radicals */
1273            new UBInfo(0xA490, 0xA4CF, "YiRadicals"), // Character.UnicodeBlock.YI_RADICALS
1274            /* AC00; D7A3; Hangul Syllables */
1275            new UBInfo(0xAC00, 0xD7AF, "HangulSyllables"), // Character.UnicodeBlock.HANGUL_SYLLABLES
1276            /* D800; DB7F; High Surrogates */
1277            /* DB80; DBFF; High Private Use Surrogates */
1278            /* DC00; DFFF; Low Surrogates */
1279            /* E000; F8FF; Private Use */
1280            /* F900; FAFF; CJK Compatibility Ideographs */
1281            new UBInfo(0xF900, 0xFAFF, "CJKCompatibilityIdeographs"), // Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
1282            /* FB00; FB4F; Alphabetic Presentation Forms */
1283            new UBInfo(0xFB00, 0xFB4F, "AlphabeticPresentationForms"), // Character.UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS
1284            /* FB50; FDFF; Arabic Presentation Forms-A */
1285            new UBInfo(0xFB50, 0xFDFF, "ArabicPresentationForms-A"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_A
1286            /* FE20; FE2F; Combining Half Marks */
1287            new UBInfo(0xFE20, 0xFE2F, "CombiningHalfMarks"), // Character.UnicodeBlock.COMBINING_HALF_MARKS
1288            /* FE30; FE4F; CJK Compatibility Forms */
1289            new UBInfo(0xFE30, 0xFE4F, "CJKCompatibilityForms"), // Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS
1290            /* FE50; FE6F; Small Form Variants */
1291            new UBInfo(0xFE50, 0xFE6F, "SmallFormVariants"), // Character.UnicodeBlock.SMALL_FORM_VARIANTS
1292            /* FE70; FEFE; Arabic Presentation Forms-B */
1293            new UBInfo(0xFE70, 0xFEFF, "ArabicPresentationForms-B"), // Character.UnicodeBlock.ARABIC_PRESENTATION_FORMS_B
1294            /* FF00; FFEF; Halfwidth and Fullwidth Forms */
1295            new UBInfo(0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"), // Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
1296            /* FFF0; FFFD; Specials */
1297            new UBInfo(0xFFF0, 0xFFFF, "Specials") // Character.UnicodeBlock.SPECIALS
1298    };
1299}
1300