1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package org.apache.harmony.regex.tests.java.util.regex;
19
20import java.io.Serializable;
21import java.util.regex.Matcher;
22import java.util.regex.Pattern;
23import java.util.regex.PatternSyntaxException;
24
25import junit.framework.TestCase;
26
27import org.apache.harmony.testframework.serialization.SerializationTest;
28import org.apache.harmony.testframework.serialization.SerializationTest.SerializableAssert;
29
30public class PatternTest extends TestCase {
31    String[] testPatterns = {
32            "(a|b)*abb",
33            "(1*2*3*4*)*567",
34            "(a|b|c|d)*aab",
35            "(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*",
36            "(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*",
37            "(a|b)*(a|b)*A(a|b)*lice.*",
38            "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|"
39                    + "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do",
40// BEGIN Android-changed
41// We don't have canonical equivalence.
42//            "x(?c)y", "x(?cc)y"
43//            "x(?:c)y"
44// END Android-changed
45
46    };
47
48    String[] testPatternsAlt = {
49            /*
50             * According to JavaDoc 2 and 3 oct digit sequences like \\o70\\o347
51             * should be OK, but test is failed for them
52             */
53            "[ab]\\b\\\\o5\\xF9\\u1E7B\\t\\n\\f\\r\\a\\e[yz]",
54            "^\\p{Lower}*\\p{Upper}*\\p{ASCII}?\\p{Alpha}?\\p{Digit}*\\p{Alnum}\\p{Punct}\\p{Graph}\\p{Print}\\p{Blank}\\p{Cntrl}\\p{XDigit}\\p{Space}",
55            "$\\p{javaLowerCase}\\p{javaUpperCase}\\p{javaWhitespace}\\p{javaMirrored}",
56            "\\p{InGreek}\\p{Lu}\\p{Sc}\\P{InGreek}[\\p{L}&&[^\\p{Lu}]]" };
57
58    String[] wrongTestPatterns = { "\\o9A", "\\p{Lawer}", "\\xG0" };
59
60    final static int[] flagsSet = { Pattern.CASE_INSENSITIVE,
61            Pattern.MULTILINE, Pattern.DOTALL, Pattern.UNICODE_CASE
62            /* , Pattern.CANON_EQ */ };
63
64    /*
65     * Based on RI implenetation documents. Need to check this set regarding
66     * actual implementation.
67     */
68    final static int[] wrongFlagsSet = { 256, 512, 1024 };
69
70    final static int DEFAULT_FLAGS = 0;
71
72    public void testMatcher() {
73        // some very simple test
74        Pattern p = Pattern.compile("a");
75        assertNotNull(p.matcher("bcde"));
76        assertNotSame(p.matcher("a"), p.matcher("a"));
77    }
78
79    public void testSplitCharSequenceInt() {
80        // splitting CharSequence which ends with pattern
81        // bug6193
82        assertEquals(",,".split(",", 3).length, 3);
83        assertEquals(",,".split(",", 4).length, 3);
84        // bug6193
85        // bug5391
86        assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5);
87        assertEquals(Pattern.compile("b").split("ab", -1).length, 2);
88        // bug5391
89        String s[];
90        Pattern pat = Pattern.compile("x");
91        s = pat.split("zxx:zzz:zxx", 10);
92        assertEquals(s.length, 5);
93        s = pat.split("zxx:zzz:zxx", 3);
94        assertEquals(s.length, 3);
95        s = pat.split("zxx:zzz:zxx", -1);
96        assertEquals(s.length, 5);
97        s = pat.split("zxx:zzz:zxx", 0);
98        assertEquals(s.length, 3);
99        // other splitting
100        // negative limit
101        pat = Pattern.compile("b");
102        s = pat.split("abccbadfebb", -1);
103        assertEquals(s.length, 5);
104        s = pat.split("", -1);
105        assertEquals(s.length, 1);
106        pat = Pattern.compile("");
107        s = pat.split("", -1);
108        assertEquals(s.length, 1);
109        s = pat.split("abccbadfe", -1);
110        assertEquals(s.length, 11);
111        // zero limit
112        pat = Pattern.compile("b");
113        s = pat.split("abccbadfebb", 0);
114        assertEquals(s.length, 3);
115        s = pat.split("", 0);
116        assertEquals(s.length, 1);
117        pat = Pattern.compile("");
118        s = pat.split("", 0);
119        assertEquals(s.length, 1);
120        s = pat.split("abccbadfe", 0);
121        assertEquals(s.length, 10);
122        // positive limit
123        pat = Pattern.compile("b");
124        s = pat.split("abccbadfebb", 12);
125        assertEquals(s.length, 5);
126        s = pat.split("", 6);
127        assertEquals(s.length, 1);
128        pat = Pattern.compile("");
129        s = pat.split("", 11);
130        assertEquals(s.length, 1);
131        s = pat.split("abccbadfe", 15);
132        assertEquals(s.length, 11);
133
134        pat = Pattern.compile("b");
135        s = pat.split("abccbadfebb", 5);
136        assertEquals(s.length, 5);
137        s = pat.split("", 1);
138        assertEquals(s.length, 1);
139        pat = Pattern.compile("");
140        s = pat.split("", 1);
141        assertEquals(s.length, 1);
142        s = pat.split("abccbadfe", 11);
143        assertEquals(s.length, 11);
144
145        pat = Pattern.compile("b");
146        s = pat.split("abccbadfebb", 3);
147        assertEquals(s.length, 3);
148        pat = Pattern.compile("");
149        s = pat.split("abccbadfe", 5);
150        assertEquals(s.length, 5);
151    }
152
153    public void testSplitCharSequence() {
154        String s[];
155        Pattern pat = Pattern.compile("b");
156        s = pat.split("abccbadfebb");
157        assertEquals(s.length, 3);
158        s = pat.split("");
159        assertEquals(s.length, 1);
160        pat = Pattern.compile("");
161        s = pat.split("");
162        assertEquals(s.length, 1);
163        s = pat.split("abccbadfe");
164        assertEquals(s.length, 10);
165        // bug6544
166        String s1 = "";
167        String[] arr = s1.split(":");
168        assertEquals(arr.length, 1);
169        // bug6544
170    }
171
172    public void testPattern() {
173        /* Positive assertion test. */
174        for (String aPattern : testPatterns) {
175            Pattern p = Pattern.compile(aPattern);
176            try {
177                assertTrue(p.pattern().equals(aPattern));
178            } catch (Exception e) {
179                fail("Unexpected exception: " + e);
180            }
181        }
182    }
183
184    public void testCompile() {
185        /* Positive assertion test. */
186        for (String aPattern : testPatterns) {
187            try {
188                Pattern p = Pattern.compile(aPattern);
189            } catch (Exception e) {
190                fail("Unexpected exception: " + e);
191            }
192        }
193
194        /* Positive assertion test with alternative templates. */
195        for (String aPattern : testPatternsAlt) {
196            try {
197                Pattern p = Pattern.compile(aPattern);
198            } catch (Exception e) {
199                fail("Unexpected exception: " + e);
200            }
201        }
202
203        /* Negative assertion test. */
204        for (String aPattern : wrongTestPatterns) {
205            try {
206                Pattern p = Pattern.compile(aPattern);
207                fail("PatternSyntaxException is expected");
208            } catch (PatternSyntaxException pse) {
209                /* OKAY */
210            } catch (Exception e) {
211                fail("Unexpected exception: " + e);
212            }
213        }
214    }
215
216    public void testFlags() {
217        String baseString;
218        String testString;
219        Pattern pat;
220        Matcher mat;
221
222        baseString = "((?i)|b)a";
223        testString = "A";
224        pat = Pattern.compile(baseString);
225        mat = pat.matcher(testString);
226        assertFalse(mat.matches());
227
228        baseString = "(?i)a|b";
229        testString = "A";
230        pat = Pattern.compile(baseString);
231        mat = pat.matcher(testString);
232        assertTrue(mat.matches());
233
234        baseString = "(?i)a|b";
235        testString = "B";
236        pat = Pattern.compile(baseString);
237        mat = pat.matcher(testString);
238        assertTrue(mat.matches());
239
240        baseString = "c|(?i)a|b";
241        testString = "B";
242        pat = Pattern.compile(baseString);
243        mat = pat.matcher(testString);
244        assertTrue(mat.matches());
245
246        baseString = "(?i)a|(?s)b";
247        testString = "B";
248        pat = Pattern.compile(baseString);
249        mat = pat.matcher(testString);
250        assertTrue(mat.matches());
251
252        baseString = "(?i)a|(?-i)b";
253        testString = "B";
254        pat = Pattern.compile(baseString);
255        mat = pat.matcher(testString);
256        assertFalse(mat.matches());
257
258        baseString = "(?i)a|(?-i)c|b";
259        testString = "B";
260        pat = Pattern.compile(baseString);
261        mat = pat.matcher(testString);
262        assertFalse(mat.matches());
263
264        baseString = "(?i)a|(?-i)c|(?i)b";
265        testString = "B";
266        pat = Pattern.compile(baseString);
267        mat = pat.matcher(testString);
268        assertTrue(mat.matches());
269
270        baseString = "(?i)a|(?-i)b";
271        testString = "A";
272        pat = Pattern.compile(baseString);
273        mat = pat.matcher(testString);
274        assertTrue(mat.matches());
275
276        baseString = "((?i))a";
277        testString = "A";
278        pat = Pattern.compile(baseString);
279        mat = pat.matcher(testString);
280        assertFalse(mat.matches());
281
282        baseString = "|(?i)|a";
283        testString = "A";
284        pat = Pattern.compile(baseString);
285        mat = pat.matcher(testString);
286        assertTrue(mat.matches());
287
288        baseString = "(?i)((?s)a.)";
289        testString = "A\n";
290        pat = Pattern.compile(baseString);
291        mat = pat.matcher(testString);
292        assertTrue(mat.matches());
293
294        baseString = "(?i)((?-i)a)";
295        testString = "A";
296        pat = Pattern.compile(baseString);
297        mat = pat.matcher(testString);
298        assertFalse(mat.matches());
299
300        baseString = "(?i)(?s:a.)";
301        testString = "A\n";
302        pat = Pattern.compile(baseString);
303        mat = pat.matcher(testString);
304        assertTrue(mat.matches());
305
306        baseString = "(?i)fgh(?s:aa)";
307        testString = "fghAA";
308        pat = Pattern.compile(baseString);
309        mat = pat.matcher(testString);
310        assertTrue(mat.matches());
311
312        baseString = "(?i)((?-i))a";
313        testString = "A";
314        pat = Pattern.compile(baseString);
315        mat = pat.matcher(testString);
316        assertTrue(mat.matches());
317
318        baseString = "abc(?i)d";
319        testString = "ABCD";
320        pat = Pattern.compile(baseString);
321        mat = pat.matcher(testString);
322        assertFalse(mat.matches());
323
324        testString = "abcD";
325        mat = pat.matcher(testString);
326        assertTrue(mat.matches());
327
328        baseString = "a(?i)a(?-i)a(?i)a(?-i)a";
329        testString = "aAaAa";
330        pat = Pattern.compile(baseString);
331        mat = pat.matcher(testString);
332        assertTrue(mat.matches());
333
334        testString = "aAAAa";
335        mat = pat.matcher(testString);
336        assertFalse(mat.matches());
337    }
338
339// BEGIN Android-removed
340// The flags() method should only return those flags that were explicitly
341// passed during the compilation. The JDK also accepts the ones implicitly
342// contained in the pattern, but ICU doesn't do this.
343//
344//    public void testFlagsMethod() {
345//        String baseString;
346//        Pattern pat;
347//
348//        /*
349//         * These tests are for compatibility with RI only. Logically we have to
350//         * return only flags specified during the compilation. For example
351//         * pat.flags() == 0 when we compile Pattern pat =
352//         * Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled
353//         * in a case insensitive manner. So there is little sense to do calls to
354//         * flags() now.
355//         */
356//        baseString = "(?-i)";
357//        pat = Pattern.compile(baseString);
358//
359//        baseString = "(?idmsux)abc(?-i)vg(?-dmu)";
360//        pat = Pattern.compile(baseString);
361//        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
362//
363//        baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)";
364//        pat = Pattern.compile(baseString);
365//        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
366//
367//        baseString = "(?is)a((?x)b.)";
368//        pat = Pattern.compile(baseString);
369//        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
370//
371//        baseString = "(?i)a((?-i))";
372//        pat = Pattern.compile(baseString);
373//        assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE);
374//
375//        baseString = "((?i)a)";
376//        pat = Pattern.compile(baseString);
377//        assertEquals(pat.flags(), 0);
378//
379//        pat = Pattern.compile("(?is)abc");
380//        assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
381//    }
382//END Android-removed
383
384    /*
385     * Check default flags when they are not specified in pattern. Based on RI
386     * since could not find that info
387     */
388    public void testFlagsCompileDefault() {
389        for (String pat : testPatternsAlt) {
390            try {
391                Pattern p = Pattern.compile(pat);
392                assertEquals(p.flags(), DEFAULT_FLAGS);
393            } catch (Exception e) {
394                fail("Unexpected exception: " + e);
395            }
396        }
397    }
398
399    /*
400     * Check that flags specified during compile are set properly This is a
401     * simple implementation that does not use flags combinations. Need to
402     * improve.
403     */
404    public void testFlagsCompileValid() {
405        for (String pat : testPatternsAlt) {
406            for (int flags : flagsSet) {
407                try {
408                    Pattern p = Pattern.compile(pat, flags);
409                    assertEquals(p.flags(), flags);
410                } catch (Exception e) {
411                    fail("Unexpected exception: " + e);
412                }
413            }
414        }
415    }
416
417    public void testCompileStringInt() {
418        /*
419         * these tests are needed to verify that appropriate exceptions are
420         * thrown
421         */
422        String pattern = "b)a";
423        try {
424            Pattern.compile(pattern);
425            fail("Expected a PatternSyntaxException when compiling pattern: "
426                    + pattern);
427        } catch (PatternSyntaxException e) {
428            // pass
429        }
430        pattern = "bcde)a";
431        try {
432            Pattern.compile(pattern);
433            fail("Expected a PatternSyntaxException when compiling pattern: "
434                    + pattern);
435        } catch (PatternSyntaxException e) {
436            // pass
437        }
438        pattern = "bbg())a";
439        try {
440            Pattern pat = Pattern.compile(pattern);
441            fail("Expected a PatternSyntaxException when compiling pattern: "
442                    + pattern);
443        } catch (PatternSyntaxException e) {
444            // pass
445        }
446
447        pattern = "cdb(?i))a";
448        try {
449            Pattern pat = Pattern.compile(pattern);
450            fail("Expected a PatternSyntaxException when compiling pattern: "
451                    + pattern);
452        } catch (PatternSyntaxException e) {
453            // pass
454        }
455
456        /*
457         * This pattern should compile - HARMONY-2127
458         * icu4c doesn't support canonical equivalence.
459         */
460//        pattern = "x(?c)y";
461//        Pattern.compile(pattern);
462
463        /*
464         * this pattern doesn't match any string, but should be compiled anyway
465         */
466        pattern = "(b\\1)a";
467        Pattern.compile(pattern);
468    }
469
470    /*
471     * Class under test for Pattern compile(String)
472     */
473    public void testQuantCompileNeg() {
474        String[] patterns = { "5{,2}", "{5asd", "{hgdhg", "{5,hjkh", "{,5hdsh",
475                "{5,3shdfkjh}" };
476        for (String element : patterns) {
477            try {
478                Pattern.compile(element);
479                fail("PatternSyntaxException was expected, but compilation succeeds");
480            } catch (PatternSyntaxException pse) {
481                continue;
482            }
483        }
484        // Regression for HARMONY-1365
485// BEGIN Android-changed
486// Original regex contained some illegal stuff. Changed it slightly,
487// while maintaining the wicked character of this "mother of all
488// regexes".
489//        String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\G*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\B*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]*+)|(?x-xd:^{5}+)()";
490        String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\.*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\.*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]{1,5})|(?x-xd:^{5}+)()";
491// END Android-changed
492        assertNotNull(Pattern.compile(pattern));
493    }
494
495    public void testQuantCompilePos() {
496        String[] patterns = {/* "(abc){1,3}", */"abc{2,}", "abc{5}" };
497        for (String element : patterns) {
498            Pattern.compile(element);
499        }
500    }
501
502    public void testQuantComposition() {
503        String pattern = "(a{1,3})aab";
504        java.util.regex.Pattern pat = java.util.regex.Pattern.compile(pattern);
505        java.util.regex.Matcher mat = pat.matcher("aaab");
506        mat.matches();
507        mat.start(1);
508        mat.group(1);
509    }
510
511    public void testMatches() {
512        String[][] posSeq = {
513                { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
514                { "213567", "12324567", "1234567", "213213567",
515                        "21312312312567", "444444567" },
516                { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
517                { "213234567", "3458", "0987654", "7689546432", "0398576",
518                        "98432", "5" },
519                {
520                        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
521                        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
522                                + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
523                { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
524                        "abbbAbbbliceaaa", "Alice" },
525                { "a123", "bnxnvgds156", "for", "while", "if", "struct" },
526                { "xy" }, { "xy" }, { "xcy" }
527
528        };
529
530        for (int i = 0; i < testPatterns.length; i++) {
531            for (int j = 0; j < posSeq[i].length; j++) {
532                assertTrue("Incorrect match: " + testPatterns[i] + " vs "
533                        + posSeq[i][j], Pattern.matches(testPatterns[i],
534                        posSeq[i][j]));
535            }
536        }
537    }
538
539    public void testMatchesException() {
540        /* Negative assertion test. */
541        for (String aPattern : wrongTestPatterns) {
542            try {
543                Pattern.matches(aPattern, "Foo");
544                fail("PatternSyntaxException is expected");
545            } catch (PatternSyntaxException pse) {
546                /* OKAY */
547            } catch (Exception e) {
548                fail("Unexpected exception: " + e);
549            }
550        }
551    }
552
553    public void testTimeZoneIssue() {
554        Pattern p = Pattern.compile("GMT(\\+|\\-)(\\d+)(:(\\d+))?");
555        Matcher m = p.matcher("GMT-9:45");
556        assertTrue(m.matches());
557        assertEquals("-", m.group(1));
558        assertEquals("9", m.group(2));
559        assertEquals(":45", m.group(3));
560        assertEquals("45", m.group(4));
561    }
562
563// BEGIN Android-changed
564// Removed one pattern that is buggy on the JDK. We don't want to duplicate that.
565    public void testCompileRanges() {
566        String[] correctTestPatterns = { "[^]*abb]*", /* "[^a-d[^m-p]]*abb", */
567                "[a-d\\d]*abb", "[abc]*abb", "[a-e&&[de]]*abb", "[^abc]*abb",
568                "[a-e&&[^de]]*abb", "[a-z&&[^m-p]]*abb", "[a-d[m-p]]*abb",
569                "[a-zA-Z]*abb", "[+*?]*abb", "[^+*?]*abb" };
570
571        String[] inputSecuence = { "kkkk", /* "admpabb", */ "abcabcd124654abb",
572                "abcabccbacababb", "dededededededeedabb", "gfdhfghgdfghabb",
573                "accabacbcbaabb", "acbvfgtyabb", "adbcacdbmopabcoabb",
574                "jhfkjhaSDFGHJkdfhHNJMjkhfabb", "+*??+*abb", "sdfghjkabb" };
575
576        Pattern pat;
577
578        for (int i = 0; i < correctTestPatterns.length; i++) {
579            assertTrue("pattern: " + correctTestPatterns[i] + " input: "
580                    + inputSecuence[i], Pattern.matches(correctTestPatterns[i],
581                    inputSecuence[i]));
582
583        }
584
585        String[] wrongInputSecuence = { "]", /* "admpkk", */  "abcabcd124k654abb",
586                "abwcabccbacababb", "abababdeababdeabb", "abcabcacbacbabb",
587                "acdcbecbaabb", "acbotyabb", "adbcaecdbmopabcoabb",
588                "jhfkjhaSDFGHJk;dfhHNJMjkhfabb", "+*?a?+*abb", "sdf+ghjkabb" };
589
590        for (int i = 0; i < correctTestPatterns.length; i++) {
591            assertFalse("pattern: " + correctTestPatterns[i] + " input: "
592                    + wrongInputSecuence[i], Pattern.matches(
593                    correctTestPatterns[i], wrongInputSecuence[i]));
594
595        }
596    }
597
598    public void testRangesSpecialCases() {
599        String neg_patterns[] = { "[a-&&[b-c]]", "[a-\\w]", "[b-a]", "[]" };
600
601        for (String element : neg_patterns) {
602            try {
603                Pattern.compile(element);
604                fail("PatternSyntaxException was expected: " + element);
605            } catch (PatternSyntaxException pse) {
606            }
607        }
608
609        String pos_patterns[] = { "[-]+", "----", "[a-]+", "a-a-a-a-aa--",
610                "[\\w-a]+", "123-2312--aaa-213", "[a-]]+", "-]]]]]]]]]]]]]]]" };
611
612        for (int i = 0; i < pos_patterns.length; i++) {
613            String pat = pos_patterns[i++];
614            String inp = pos_patterns[i];
615            assertTrue("pattern: " + pat + " input: " + inp, Pattern.matches(
616                    pat, inp));
617        }
618    }
619 // END Android-changed
620
621    public void testZeroSymbols() {
622        assertTrue(Pattern.matches("[\0]*abb", "\0\0\0\0\0\0abb"));
623    }
624
625    public void testEscapes() {
626        Pattern pat = Pattern.compile("\\Q{]()*?");
627        Matcher mat = pat.matcher("{]()*?");
628
629        assertTrue(mat.matches());
630    }
631
632    public void test_bug_181() {
633        Pattern.compile("[\\t-\\r]");
634    }
635
636    // https://code.google.com/p/android/issues/detail?id=40103
637    public void test_bug_40103() {
638        Pattern.compile("(?<!abc {1,100}|def {1,100}|ghi {1,100})jkl");
639
640        // Looks like harmony had a similar "Bug187"...
641        Pattern.compile("|(?idmsux-idmsux)|(?idmsux-idmsux)|[^|\\[-\\0274|\\,-\\\\[^|W\\}\\nq\\x65\\002\\xFE\\05\\06\\00\\x66\\x47i\\,\\xF2\\=\\06\\u0EA4\\x9B\\x3C\\f\\|\\{\\xE5\\05\\r\\u944A\\xCA\\e|\\x19\\04\\x07\\04\\u607B\\023\\0073\\x91Tr\\0150\\x83]]?(?idmsux-idmsux:\\p{Alpha}{7}?)||(?<=[^\\uEC47\\01\\02\\u3421\\a\\f\\a\\013q\\035w\\e])(?<=\\p{Punct}{0,}?)(?=^\\p{Lower})(?!\\b{8,14})(?<![|\\00-\\0146[^|\\04\\01\\04\\060\\f\\u224DO\\x1A\\xC4\\00\\02\\0315\\0351\\u84A8\\xCBt\\xCC\\06|\\0141\\00\\=\\e\\f\\x6B\\0026Tb\\040\\x76xJ&&[\\\\-\\]\\05\\07\\02\\u2DAF\\t\\x9C\\e\\0023\\02\\,X\\e|\\u6058flY\\u954C]]]{5}?)(?<=\\p{Sc}{8}+)[^|\\026-\\u89BA|o\\u6277\\t\\07\\x50&&\\p{Punct}]{8,14}+((?<=^\\p{Punct})|(?idmsux-idmsux)||(?>[\\x3E-\\]])|(?idmsux-idmsux:\\p{Punct})|(?<![\\0111\\0371\\xDF\\u6A49\\07\\u2A4D\\00\\0212\\02Xd-\\xED[^\\a-\\0061|\\0257\\04\\f\\[\\0266\\043\\03\\x2D\\042&&[^\\f-\\]&&\\s]]])|(?>[|\\n\\042\\uB09F\\06\\u0F2B\\uC96D\\x89\\uC166\\xAA|\\04-\\][^|\\a\\|\\rx\\04\\uA770\\n\\02\\t\\052\\056\\0274\\|\\=\\07\\e|\\00-\\x1D&&[^\\005\\uB15B\\uCDAC\\n\\x74\\0103\\0147\\uD91B\\n\\062G\\u9B4B\\077\\}\\0324&&[^\\0302\\,\\0221\\04\\u6D16\\04xy\\uD193\\[\\061\\06\\045\\x0F|\\e\\xBB\\f\\u1B52\\023\\u3AD2\\033\\007\\022\\}\\x66\\uA63FJ-\\0304]]]]{0,0})||(?<![^|\\0154U\\u0877\\03\\fy\\n\\|\\0147\\07-\\=[|q\\u69BE\\0243\\rp\\053\\02\\x33I\\u5E39\\u9C40\\052-\\xBC[|\\0064-\\?|\\uFC0C\\x30\\0060\\x45\\\\\\02\\?p\\xD8\\0155\\07\\0367\\04\\uF07B\\000J[^|\\0051-\\{|\\u9E4E\\u7328\\]\\u6AB8\\06\\x71\\a\\]\\e\\|KN\\u06AA\\0000\\063\\u2523&&[\\005\\0277\\x41U\\034\\}R\\u14C7\\u4767\\x09\\n\\054Ev\\0144\\<\\f\\,Q-\\xE4]]]]]{3}+)|(?>^+)|(?![^|\\|\\nJ\\t\\<\\04E\\\\\\t\\01\\\\\\02\\|\\=\\}\\xF3\\uBEC2\\032K\\014\\uCC5F\\072q\\|\\0153\\xD9\\0322\\uC6C8[^\\t\\0342\\x34\\x91\\06\\{\\xF1\\a\\u1710\\?\\xE7\\uC106\\02pF\\<&&[^|\\]\\064\\u381D\\u50CF\\eO&&[^|\\06\\x2F\\04\\045\\032\\u8536W\\0377\\0017|\\x06\\uE5FA\\05\\xD4\\020\\04c\\xFC\\02H\\x0A\\r]]]]+?)(?idmsux-idmsux)|(?<![|\\r-\\,&&[I\\t\\r\\0201\\xDB\\e&&[^|\\02\\06\\00\\<\\a\\u7952\\064\\051\\073\\x41\\?n\\040\\0053\\031&&[\\x15-\\|]]]]{8,11}?)(?![^|\\<-\\uA74B\\xFA\\u7CD2\\024\\07n\\<\\x6A\\0042\\uE4FF\\r\\u896B\\[\\=\\042Y&&^\\p{ASCII}]++)|(?<![R-\\|&&[\\a\\0120A\\u6145\\<\\050-d[|\\e-\\uA07C|\\016-\\u80D9]]]{1,}+)|(?idmsux-idmsux)|(?idmsux-idmsux)|(?idmsux-idmsux:\\B{6,}?)|(?<=\\D{5,8}?)|(?>[\\{-\\0207|\\06-\\0276\\p{XDigit}])(?idmsux-idmsux:[^|\\x52\\0012\\]u\\xAD\\0051f\\0142\\\\l\\|\\050\\05\\f\\t\\u7B91\\r\\u7763\\{|h\\0104\\a\\f\\0234\\u2D4F&&^\\P{InGreek}]))");
642    }
643
644    public void test_bug_4472() {
645        // HARMONY-4472
646        Pattern.compile("a*.+");
647    }
648
649    public void test_bug_5858() {
650        // HARMONY-5858
651        Pattern.compile("\\u6211", Pattern.LITERAL);
652    }
653
654    public void testOrphanQuantifiers() {
655        try {
656            Pattern.compile("+++++");
657            fail("PatternSyntaxException expected");
658        } catch (PatternSyntaxException pse) {
659        }
660    }
661
662    public void testOrphanQuantifiers2() {
663        try {
664            Pattern pat = Pattern.compile("\\d+*");
665            fail("PatternSyntaxException expected");
666        } catch (PatternSyntaxException pse) {
667        }
668    }
669
670    public void testBug197() {
671        Object[] vals = { ":", new Integer(2),
672                new String[] { "boo", "and:foo" }, ":", new Integer(5),
673                new String[] { "boo", "and", "foo" }, ":", new Integer(-2),
674                new String[] { "boo", "and", "foo" }, ":", new Integer(3),
675                new String[] { "boo", "and", "foo" }, ":", new Integer(1),
676                new String[] { "boo:and:foo" }, "o", new Integer(5),
677                new String[] { "b", "", ":and:f", "", "" }, "o",
678                new Integer(4), new String[] { "b", "", ":and:f", "o" }, "o",
679                new Integer(-2), new String[] { "b", "", ":and:f", "", "" },
680                "o", new Integer(0), new String[] { "b", "", ":and:f" } };
681
682        for (int i = 0; i < vals.length / 3;) {
683            String[] res = Pattern.compile(vals[i++].toString()).split(
684                    "boo:and:foo", ((Integer) vals[i++]).intValue());
685            String[] expectedRes = (String[]) vals[i++];
686
687            assertEquals(expectedRes.length, res.length);
688
689            for (int j = 0; j < expectedRes.length; j++) {
690                assertEquals(expectedRes[j], res[j]);
691            }
692        }
693    }
694
695    public void testURIPatterns() {
696        String URI_REGEXP_STR = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
697        String SCHEME_REGEXP_STR = "^[a-zA-Z]{1}[\\w+-.]+$";
698        String REL_URI_REGEXP_STR = "^(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
699        String IPV6_REGEXP_STR = "^[0-9a-fA-F\\:\\.]+(\\%\\w+)?$";
700        String IPV6_REGEXP_STR2 = "^\\[[0-9a-fA-F\\:\\.]+(\\%\\w+)?\\]$";
701        String IPV4_REGEXP_STR = "^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$";
702        String HOSTNAME_REGEXP_STR = "\\w+[\\w\\-\\.]*";
703
704        Pattern.compile(URI_REGEXP_STR);
705        Pattern.compile(REL_URI_REGEXP_STR);
706        Pattern.compile(SCHEME_REGEXP_STR);
707        Pattern.compile(IPV4_REGEXP_STR);
708        Pattern.compile(IPV6_REGEXP_STR);
709        Pattern.compile(IPV6_REGEXP_STR2);
710        Pattern.compile(HOSTNAME_REGEXP_STR);
711    }
712
713    public void testFindBoundaryCases1() {
714        Pattern pat = Pattern.compile(".*\n");
715        Matcher mat = pat.matcher("a\n");
716
717        mat.find();
718        assertEquals("a\n", mat.group());
719    }
720
721    public void testFindBoundaryCases2() {
722        Pattern pat = Pattern.compile(".*A");
723        Matcher mat = pat.matcher("aAa");
724
725        mat.find();
726        assertEquals("aA", mat.group());
727    }
728
729    public void testFindBoundaryCases3() {
730        Pattern pat = Pattern.compile(".*A");
731        Matcher mat = pat.matcher("a\naA\n");
732
733        mat.find();
734        assertEquals("aA", mat.group());
735    }
736
737    public void testFindBoundaryCases4() {
738        Pattern pat = Pattern.compile("A.*");
739        Matcher mat = pat.matcher("A\n");
740
741        mat.find();
742        assertEquals("A", mat.group());
743    }
744
745    public void testFindBoundaryCases5() {
746        Pattern pat = Pattern.compile(".*A.*");
747        Matcher mat = pat.matcher("\nA\naaa\nA\naaAaa\naaaA\n");
748        // Matcher mat = pat.matcher("\nA\n");
749        String[] res = { "A", "A", "aaAaa", "aaaA" };
750        int k = 0;
751        for (; mat.find(); k++) {
752            assertEquals(res[k], mat.group());
753        }
754    }
755
756    public void testFindBoundaryCases6() {
757        String[] res = { "", "a", "", "" };
758        Pattern pat = Pattern.compile(".*");
759        Matcher mat = pat.matcher("\na\n");
760        int k = 0;
761        for (; mat.find(); k++) {
762            assertEquals(res[k], mat.group());
763        }
764        assertEquals(4, k);
765    }
766
767    public void testBackReferences() {
768        Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))");
769        Matcher mat = pat.matcher("(start1: word :start1)(start2: word :start2)");
770        int k = 1;
771        for (; mat.find(); k++) {
772            assertEquals("start" + k, mat.group(2));
773            assertEquals(" word ", mat.group(3));
774            assertEquals("start" + k, mat.group(4));
775        }
776
777        assertEquals(3, k);
778        pat = Pattern.compile(".*(.)\\1");
779        mat = pat.matcher("saa");
780        assertTrue(mat.matches());
781    }
782
783    public void testNewLine() {
784        Pattern pat = Pattern.compile("(^$)*\n", Pattern.MULTILINE);
785        Matcher mat = pat.matcher("\r\n\n");
786        int counter = 0;
787        while (mat.find()) {
788            counter++;
789        }
790        assertEquals(2, counter);
791    }
792
793    public void testFindGreedy() {
794        Pattern pat = Pattern.compile(".*aaa", Pattern.DOTALL);
795        Matcher mat = pat.matcher("aaaa\naaa\naaaaaa");
796        mat.matches();
797        assertEquals(15, mat.end());
798    }
799
800    public void testSerialization() throws Exception {
801        Pattern pat = Pattern.compile("a*bc");
802        SerializableAssert comparator = new SerializableAssert() {
803            public void assertDeserialized(Serializable initial,
804                    Serializable deserialized) {
805                assertEquals(((Pattern) initial).toString(),
806                        ((Pattern) deserialized).toString());
807            }
808        };
809        SerializationTest.verifyGolden(this, pat, comparator);
810        SerializationTest.verifySelf(pat, comparator);
811    }
812
813    public void testSOLQuant() {
814        Pattern pat = Pattern.compile("$*", Pattern.MULTILINE);
815        Matcher mat = pat.matcher("\n\n");
816        int counter = 0;
817        while (mat.find()) {
818            counter++;
819        }
820
821        assertEquals(3, counter);
822    }
823
824    public void testIllegalEscape() {
825        try {
826            Pattern.compile("\\y");
827            fail("PatternSyntaxException expected");
828        } catch (PatternSyntaxException pse) {
829        }
830    }
831
832    public void testEmptyFamily() {
833        Pattern.compile("\\p{Lower}");
834    }
835
836    public void testNonCaptConstr() {
837        // Flags
838        Pattern pat = Pattern.compile("(?i)b*(?-i)a*");
839        assertTrue(pat.matcher("bBbBaaaa").matches());
840        assertFalse(pat.matcher("bBbBAaAa").matches());
841
842        // Non-capturing groups
843        pat = Pattern.compile("(?i:b*)a*");
844        assertTrue(pat.matcher("bBbBaaaa").matches());
845        assertFalse(pat.matcher("bBbBAaAa").matches());
846
847        pat = Pattern
848        // 1 2 3 4 5 6 7 8 9 10 11
849                .compile("(?:-|(-?\\d+\\d\\d\\d))?(?:-|-(\\d\\d))?(?:-|-(\\d\\d))?(T)?(?:(\\d\\d):(\\d\\d):(\\d\\d)(\\.\\d+)?)?(?:(?:((?:\\+|\\-)\\d\\d):(\\d\\d))|(Z))?");
850        Matcher mat = pat.matcher("-1234-21-31T41:51:61.789+71:81");
851        assertTrue(mat.matches());
852        assertEquals("-1234", mat.group(1));
853        assertEquals("21", mat.group(2));
854        assertEquals("31", mat.group(3));
855        assertEquals("T", mat.group(4));
856        assertEquals("41", mat.group(5));
857        assertEquals("51", mat.group(6));
858        assertEquals("61", mat.group(7));
859        assertEquals(".789", mat.group(8));
860        assertEquals("+71", mat.group(9));
861        assertEquals("81", mat.group(10));
862
863        // positive lookahead
864        pat = Pattern.compile(".*\\.(?=log$).*$");
865        assertTrue(pat.matcher("a.b.c.log").matches());
866        assertFalse(pat.matcher("a.b.c.log.").matches());
867
868        // negative lookahead
869        pat = Pattern.compile(".*\\.(?!log$).*$");
870        assertFalse(pat.matcher("abc.log").matches());
871        assertTrue(pat.matcher("abc.logg").matches());
872
873        // positive lookbehind
874        pat = Pattern.compile(".*(?<=abc)\\.log$");
875        assertFalse(pat.matcher("cde.log").matches());
876        assertTrue(pat.matcher("abc.log").matches());
877
878        // negative lookbehind
879        pat = Pattern.compile(".*(?<!abc)\\.log$");
880        assertTrue(pat.matcher("cde.log").matches());
881        assertFalse(pat.matcher("abc.log").matches());
882
883        // atomic group
884        pat = Pattern.compile("(?>a*)abb");
885        assertFalse(pat.matcher("aaabb").matches());
886        pat = Pattern.compile("(?>a*)bb");
887        assertTrue(pat.matcher("aaabb").matches());
888
889        pat = Pattern.compile("(?>a|aa)aabb");
890        assertTrue(pat.matcher("aaabb").matches());
891        pat = Pattern.compile("(?>aa|a)aabb");
892        assertFalse(pat.matcher("aaabb").matches());
893
894// BEGIN Android-removed
895// Questionable constructs that ICU doesn't support.
896//        // quantifiers over look ahead
897//        pat = Pattern.compile(".*(?<=abc)*\\.log$");
898//        assertTrue(pat.matcher("cde.log").matches());
899//        pat = Pattern.compile(".*(?<=abc)+\\.log$");
900//        assertFalse(pat.matcher("cde.log").matches());
901// END Android-removed
902
903    }
904
905    public void testCorrectReplacementBackreferencedJointSet() {
906        Pattern.compile("ab(a)*\\1");
907        Pattern.compile("abc(cd)fg");
908        Pattern.compile("aba*cd");
909        Pattern.compile("ab(a)*+cd");
910        Pattern.compile("ab(a)*?cd");
911        Pattern.compile("ab(a)+cd");
912        Pattern.compile(".*(.)\\1");
913        Pattern.compile("ab((a)|c|d)e");
914        Pattern.compile("abc((a(b))cd)");
915        Pattern.compile("ab(a)++cd");
916        Pattern.compile("ab(a)?(c)d");
917        Pattern.compile("ab(a)?+cd");
918        Pattern.compile("ab(a)??cd");
919        Pattern.compile("ab(a)??cd");
920        Pattern.compile("ab(a){1,3}?(c)d");
921    }
922
923    public void testCompilePatternWithTerminatorMark() {
924        Pattern pat = Pattern.compile("a\u0000\u0000cd");
925        Matcher mat = pat.matcher("a\u0000\u0000cd");
926        assertTrue(mat.matches());
927    }
928
929    public void testAlternations() {
930        String baseString = "|a|bc";
931        Pattern pat = Pattern.compile(baseString);
932        Matcher mat = pat.matcher("");
933
934        assertTrue(mat.matches());
935
936        baseString = "a||bc";
937        pat = Pattern.compile(baseString);
938        mat = pat.matcher("");
939        assertTrue(mat.matches());
940
941        baseString = "a|bc|";
942        pat = Pattern.compile(baseString);
943        mat = pat.matcher("");
944        assertTrue(mat.matches());
945
946        baseString = "a|b|";
947        pat = Pattern.compile(baseString);
948        mat = pat.matcher("");
949        assertTrue(mat.matches());
950
951        baseString = "a(|b|cd)e";
952        pat = Pattern.compile(baseString);
953        mat = pat.matcher("ae");
954        assertTrue(mat.matches());
955
956        baseString = "a(b||cd)e";
957        pat = Pattern.compile(baseString);
958        mat = pat.matcher("ae");
959        assertTrue(mat.matches());
960
961        baseString = "a(b|cd|)e";
962        pat = Pattern.compile(baseString);
963        mat = pat.matcher("ae");
964        assertTrue(mat.matches());
965
966        baseString = "a(b|c|)e";
967        pat = Pattern.compile(baseString);
968        mat = pat.matcher("ae");
969        assertTrue(mat.matches());
970
971        baseString = "a(|)e";
972        pat = Pattern.compile(baseString);
973        mat = pat.matcher("ae");
974        assertTrue(mat.matches());
975
976        baseString = "|";
977        pat = Pattern.compile(baseString);
978        mat = pat.matcher("");
979        assertTrue(mat.matches());
980
981        baseString = "a(?:|)e";
982        pat = Pattern.compile(baseString);
983        mat = pat.matcher("ae");
984        assertTrue(mat.matches());
985
986        baseString = "a||||bc";
987        pat = Pattern.compile(baseString);
988        mat = pat.matcher("");
989        assertTrue(mat.matches());
990
991        baseString = "(?i-is)|a";
992        pat = Pattern.compile(baseString);
993        mat = pat.matcher("a");
994        assertTrue(mat.matches());
995    }
996
997    public void testMatchWithGroups() {
998        String baseString = "jwkerhjwehrkwjehrkwjhrwkjehrjwkehrjkwhrkwehrkwhrkwrhwkhrwkjehr";
999        String pattern = ".*(..).*\\1.*";
1000        assertTrue(Pattern.compile(pattern).matcher(baseString).matches());
1001
1002        baseString = "saa";
1003        pattern = ".*(.)\\1";
1004        assertTrue(Pattern.compile(pattern).matcher(baseString).matches());
1005        assertTrue(Pattern.compile(pattern).matcher(baseString).find());
1006    }
1007
1008    public void testSplitEmptyCharSequence() {
1009        String s1 = "";
1010        String[] arr = s1.split(":");
1011        assertEquals(arr.length, 1);
1012    }
1013
1014    public void testSplitEndsWithPattern() {
1015        assertEquals(",,".split(",", 3).length, 3);
1016        assertEquals(",,".split(",", 4).length, 3);
1017
1018        assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5);
1019        assertEquals(Pattern.compile("b").split("ab", -1).length, 2);
1020    }
1021
1022    public void testCaseInsensitiveFlag() {
1023        assertTrue(Pattern.matches("(?i-:AbC)", "ABC"));
1024    }
1025
1026    public void testEmptyGroups() {
1027        Pattern pat = Pattern.compile("ab(?>)cda");
1028        Matcher mat = pat.matcher("abcda");
1029        assertTrue(mat.matches());
1030
1031        pat = Pattern.compile("ab()");
1032        mat = pat.matcher("ab");
1033        assertTrue(mat.matches());
1034
1035        pat = Pattern.compile("abc(?:)(..)");
1036        mat = pat.matcher("abcgf");
1037        assertTrue(mat.matches());
1038    }
1039
1040    public void testEmbeddedFlags() {
1041        String baseString = "(?i)((?s)a)";
1042        String testString = "A";
1043        Pattern pat = Pattern.compile(baseString);
1044        Matcher mat = pat.matcher(testString);
1045        assertTrue(mat.matches());
1046
1047        baseString = "(?x)(?i)(?s)(?d)a";
1048        testString = "A";
1049        pat = Pattern.compile(baseString);
1050        mat = pat.matcher(testString);
1051        assertTrue(mat.matches());
1052
1053        baseString = "(?x)(?i)(?s)(?d)a.";
1054        testString = "a\n";
1055        pat = Pattern.compile(baseString);
1056        mat = pat.matcher(testString);
1057        assertTrue(mat.matches());
1058
1059        baseString = "abc(?x:(?i)(?s)(?d)a.)";
1060        testString = "abcA\n";
1061        pat = Pattern.compile(baseString);
1062        mat = pat.matcher(testString);
1063        assertTrue(mat.matches());
1064
1065        baseString = "abc((?x)d)(?i)(?s)a";
1066        testString = "abcdA";
1067        pat = Pattern.compile(baseString);
1068        mat = pat.matcher(testString);
1069        assertTrue(mat.matches());
1070    }
1071
1072    public void testAltWithFlags() {
1073        Pattern.compile("|(?i-xi)|()");
1074    }
1075
1076    public void testRestoreFlagsAfterGroup() {
1077        String baseString = "abc((?x)d)   a";
1078        String testString = "abcd   a";
1079        Pattern pat = Pattern.compile(baseString);
1080        Matcher mat = pat.matcher(testString);
1081
1082        assertTrue(mat.matches());
1083    }
1084
1085    /*
1086     * Verify if the Pattern support the following character classes:
1087     * \p{javaLowerCase} \p{javaUpperCase} \p{javaWhitespace} \p{javaMirrored}
1088     */
1089    public void testCompileCharacterClass() {
1090        // Regression for HARMONY-606, 696
1091        Pattern pattern = Pattern.compile("\\p{javaLowerCase}");
1092        assertNotNull(pattern);
1093
1094        pattern = Pattern.compile("\\p{javaUpperCase}");
1095        assertNotNull(pattern);
1096
1097        pattern = Pattern.compile("\\p{javaWhitespace}");
1098        assertNotNull(pattern);
1099
1100        pattern = Pattern.compile("\\p{javaMirrored}");
1101        assertNotNull(pattern);
1102
1103        pattern = Pattern.compile("\\p{javaDefined}");
1104        assertNotNull(pattern);
1105
1106        pattern = Pattern.compile("\\p{javaDigit}");
1107        assertNotNull(pattern);
1108
1109        pattern = Pattern.compile("\\p{javaIdentifierIgnorable}");
1110        assertNotNull(pattern);
1111
1112        pattern = Pattern.compile("\\p{javaISOControl}");
1113        assertNotNull(pattern);
1114
1115        pattern = Pattern.compile("\\p{javaJavaIdentifierPart}");
1116        assertNotNull(pattern);
1117
1118        pattern = Pattern.compile("\\p{javaJavaIdentifierStart}");
1119        assertNotNull(pattern);
1120
1121        pattern = Pattern.compile("\\p{javaLetter}");
1122        assertNotNull(pattern);
1123
1124        pattern = Pattern.compile("\\p{javaLetterOrDigit}");
1125        assertNotNull(pattern);
1126
1127        pattern = Pattern.compile("\\p{javaSpaceChar}");
1128        assertNotNull(pattern);
1129
1130        pattern = Pattern.compile("\\p{javaTitleCase}");
1131        assertNotNull(pattern);
1132
1133        pattern = Pattern.compile("\\p{javaUnicodeIdentifierPart}");
1134        assertNotNull(pattern);
1135
1136        pattern = Pattern.compile("\\p{javaUnicodeIdentifierStart}");
1137        assertNotNull(pattern);
1138    }
1139
1140    public void testRangesWithSurrogatesSupplementary() {
1141        String patString = "[abc\uD8D2]";
1142        String testString = "\uD8D2";
1143        Pattern pat = Pattern.compile(patString);
1144        Matcher mat = pat.matcher(testString);
1145        assertTrue(mat.matches());
1146
1147        testString = "a";
1148        mat = pat.matcher(testString);
1149        assertTrue(mat.matches());
1150
1151        testString = "ef\uD8D2\uDD71gh";
1152        mat = pat.matcher(testString);
1153        assertFalse(mat.find());
1154
1155        testString = "ef\uD8D2gh";
1156        mat = pat.matcher(testString);
1157        assertTrue(mat.find());
1158
1159        patString = "[abc\uD8D3&&[c\uD8D3]]";
1160        testString = "c";
1161        pat = Pattern.compile(patString);
1162        mat = pat.matcher(testString);
1163        assertTrue(mat.matches());
1164
1165        testString = "a";
1166        mat = pat.matcher(testString);
1167        assertFalse(mat.matches());
1168
1169        testString = "ef\uD8D3\uDD71gh";
1170        mat = pat.matcher(testString);
1171        assertFalse(mat.find());
1172
1173        testString = "ef\uD8D3gh";
1174        mat = pat.matcher(testString);
1175        assertTrue(mat.find());
1176
1177        patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]";
1178        testString = "c";
1179        pat = Pattern.compile(patString);
1180        mat = pat.matcher(testString);
1181        assertTrue(mat.matches());
1182
1183        testString = "\uDBEE\uDF0C";
1184        mat = pat.matcher(testString);
1185        assertTrue(mat.matches());
1186
1187        testString = "ef\uD8D3\uDD71gh";
1188        mat = pat.matcher(testString);
1189        assertFalse(mat.find());
1190
1191        testString = "ef\uD8D3gh";
1192        mat = pat.matcher(testString);
1193        assertTrue(mat.find());
1194
1195        patString = "[abc\uDBFC]\uDDC2cd";
1196        testString = "\uDBFC\uDDC2cd";
1197        pat = Pattern.compile(patString);
1198        mat = pat.matcher(testString);
1199        assertFalse(mat.matches());
1200
1201        testString = "a\uDDC2cd";
1202        mat = pat.matcher(testString);
1203        assertTrue(mat.matches());
1204    }
1205
1206    public void testSequencesWithSurrogatesSupplementary() {
1207        String patString = "abcd\uD8D3";
1208        String testString = "abcd\uD8D3\uDFFC";
1209        Pattern pat = Pattern.compile(patString);
1210        Matcher mat = pat.matcher(testString);
1211// BEGIN Android-changed
1212// This one really doesn't make sense, as the above is a corrupt surrogate.
1213// Even if it's matched by the JDK, it's more of a bug than of a behavior one
1214// might want to duplicate.
1215//        assertFalse(mat.find());
1216// END Android-changed
1217
1218        testString = "abcd\uD8D3abc";
1219        mat = pat.matcher(testString);
1220        assertTrue(mat.find());
1221
1222        patString = "ab\uDBEFcd";
1223        testString = "ab\uDBEFcd";
1224        pat = Pattern.compile(patString);
1225        mat = pat.matcher(testString);
1226        assertTrue(mat.matches());
1227
1228        patString = "\uDFFCabcd";
1229        testString = "\uD8D3\uDFFCabcd";
1230        pat = Pattern.compile(patString);
1231        mat = pat.matcher(testString);
1232        assertFalse(mat.find());
1233
1234        testString = "abc\uDFFCabcdecd";
1235        mat = pat.matcher(testString);
1236        assertTrue(mat.find());
1237
1238        patString = "\uD8D3\uDFFCabcd";
1239        testString = "abc\uD8D3\uD8D3\uDFFCabcd";
1240        pat = Pattern.compile(patString);
1241        mat = pat.matcher(testString);
1242        assertTrue(mat.find());
1243    }
1244
1245    public void testPredefinedClassesWithSurrogatesSupplementary() {
1246        String patString = "[123\\D]";
1247        String testString = "a";
1248        Pattern pat = Pattern.compile(patString);
1249        Matcher mat = pat.matcher(testString);
1250        assertTrue(mat.find());
1251
1252        testString = "5";
1253        mat = pat.matcher(testString);
1254        assertFalse(mat.find());
1255
1256        testString = "3";
1257        mat = pat.matcher(testString);
1258        assertTrue(mat.find());
1259
1260        // low surrogate
1261        testString = "\uDFC4";
1262        mat = pat.matcher(testString);
1263        assertTrue(mat.find());
1264
1265        // high surrogate
1266        testString = "\uDADA";
1267        mat = pat.matcher(testString);
1268        assertTrue(mat.find());
1269
1270        testString = "\uDADA\uDFC4";
1271        mat = pat.matcher(testString);
1272        assertTrue(mat.find());
1273
1274        patString = "[123[^\\p{javaDigit}]]";
1275        testString = "a";
1276        pat = Pattern.compile(patString);
1277        mat = pat.matcher(testString);
1278        assertTrue(mat.find());
1279
1280        testString = "5";
1281        mat = pat.matcher(testString);
1282        assertFalse(mat.find());
1283
1284        testString = "3";
1285        mat = pat.matcher(testString);
1286        assertTrue(mat.find());
1287
1288        // low surrogate
1289        testString = "\uDFC4";
1290        mat = pat.matcher(testString);
1291        assertTrue(mat.find());
1292
1293        // high surrogate
1294        testString = "\uDADA";
1295        mat = pat.matcher(testString);
1296        assertTrue(mat.find());
1297
1298        testString = "\uDADA\uDFC4";
1299        mat = pat.matcher(testString);
1300        assertTrue(mat.find());
1301
1302        // surrogate characters
1303        patString = "\\p{Cs}";
1304        testString = "\uD916\uDE27";
1305        pat = Pattern.compile(patString);
1306        mat = pat.matcher(testString);
1307
1308        /*
1309         * see http://www.unicode.org/reports/tr18/#Supplementary_Characters we
1310         * have to treat text as code points not code units. \\p{Cs} matches any
1311         * surrogate character but here testString is a one code point
1312         * consisting of two code units (two surrogate characters) so we find
1313         * nothing
1314         */
1315        // assertFalse(mat.find());
1316        // swap low and high surrogates
1317        testString = "\uDE27\uD916";
1318        mat = pat.matcher(testString);
1319        assertTrue(mat.find());
1320
1321        patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]";
1322        testString = "1";
1323        pat = Pattern.compile(patString);
1324        mat = pat.matcher(testString);
1325        assertTrue(mat.find());
1326
1327        testString = "\uD916";
1328        pat = Pattern.compile(patString);
1329        mat = pat.matcher(testString);
1330        assertFalse(mat.find());
1331
1332        testString = "\uD916\uDE27";
1333        pat = Pattern.compile(patString);
1334        mat = pat.matcher(testString);
1335        assertTrue(mat.find());
1336
1337        // \uD9A0\uDE8E=\u7828E
1338        // \u78281=\uD9A0\uDE81
1339        patString = "[a-\uD9A0\uDE8E]";
1340        testString = "\uD9A0\uDE81";
1341        pat = Pattern.compile(patString);
1342        mat = pat.matcher(testString);
1343        assertTrue(mat.matches());
1344    }
1345
1346    public void testDotConstructionWithSurrogatesSupplementary() {
1347        String patString = ".";
1348        String testString = "\uD9A0\uDE81";
1349        Pattern pat = Pattern.compile(patString);
1350        Matcher mat = pat.matcher(testString);
1351        assertTrue(mat.matches());
1352
1353        testString = "\uDE81";
1354        mat = pat.matcher(testString);
1355        assertTrue(mat.matches());
1356
1357        testString = "\uD9A0";
1358        mat = pat.matcher(testString);
1359        assertTrue(mat.matches());
1360
1361        testString = "\n";
1362        mat = pat.matcher(testString);
1363        assertFalse(mat.matches());
1364
1365        patString = ".*\uDE81";
1366        testString = "\uD9A0\uDE81\uD9A0\uDE81\uD9A0\uDE81";
1367        pat = Pattern.compile(patString);
1368        mat = pat.matcher(testString);
1369        assertFalse(mat.matches());
1370
1371        testString = "\uD9A0\uDE81\uD9A0\uDE81\uDE81";
1372        mat = pat.matcher(testString);
1373        assertTrue(mat.matches());
1374
1375        patString = ".*";
1376        testString = "\uD9A0\uDE81\n\uD9A0\uDE81\uD9A0\n\uDE81";
1377        pat = Pattern.compile(patString, Pattern.DOTALL);
1378        mat = pat.matcher(testString);
1379        assertTrue(mat.matches());
1380    }
1381
1382    public void test_quoteLjava_lang_String() {
1383        for (String aPattern : testPatterns) {
1384            Pattern p = Pattern.compile(aPattern);
1385            try {
1386                assertEquals("quote was wrong for plain text", "\\Qtest\\E", p
1387                        .quote("test"));
1388                assertEquals("quote was wrong for text with quote sign",
1389                        "\\Q\\Qtest\\E", p.quote("\\Qtest"));
1390                assertEquals("quote was wrong for quotted text",
1391                        "\\Q\\Qtest\\E\\\\E\\Q\\E", p.quote("\\Qtest\\E"));
1392            } catch (Exception e) {
1393                fail("Unexpected exception: " + e);
1394            }
1395        }
1396    }
1397
1398    public void test_matcherLjava_lang_StringLjava_lang_CharSequence() {
1399        String[][] posSeq = {
1400                { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
1401                { "213567", "12324567", "1234567", "213213567",
1402                        "21312312312567", "444444567" },
1403                { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
1404                { "213234567", "3458", "0987654", "7689546432", "0398576",
1405                        "98432", "5" },
1406                {
1407                        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
1408                        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
1409                                + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
1410                { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
1411                        "abbbAbbbliceaaa", "Alice" },
1412                { "a123", "bnxnvgds156", "for", "while", "if", "struct" },
1413                { "xy" }, { "xy" }, { "xcy" }
1414
1415        };
1416
1417        for (int i = 0; i < testPatterns.length; i++) {
1418            for (int j = 0; j < posSeq[i].length; j++) {
1419                assertTrue("Incorrect match: " + testPatterns[i] + " vs "
1420                        + posSeq[i][j], Pattern.compile(testPatterns[i])
1421                        .matcher(posSeq[i][j]).matches());
1422            }
1423        }
1424    }
1425
1426    public void testQuantifiersWithSurrogatesSupplementary() {
1427        String patString = "\uD9A0\uDE81*abc";
1428        String testString = "\uD9A0\uDE81\uD9A0\uDE81abc";
1429        Pattern pat = Pattern.compile(patString);
1430        Matcher mat = pat.matcher(testString);
1431        assertTrue(mat.matches());
1432
1433        testString = "abc";
1434        mat = pat.matcher(testString);
1435        assertTrue(mat.matches());
1436    }
1437
1438    public void testAlternationsWithSurrogatesSupplementary() {
1439        String patString = "\uDE81|\uD9A0\uDE81|\uD9A0";
1440        String testString = "\uD9A0";
1441        Pattern pat = Pattern.compile(patString);
1442        Matcher mat = pat.matcher(testString);
1443        assertTrue(mat.matches());
1444
1445        testString = "\uDE81";
1446        mat = pat.matcher(testString);
1447        assertTrue(mat.matches());
1448
1449        testString = "\uD9A0\uDE81";
1450        mat = pat.matcher(testString);
1451        assertTrue(mat.matches());
1452
1453        testString = "\uDE81\uD9A0";
1454        mat = pat.matcher(testString);
1455        assertFalse(mat.matches());
1456    }
1457
1458    public void testGroupsWithSurrogatesSupplementary() {
1459
1460        //this pattern matches nothing
1461        String patString = "(\uD9A0)\uDE81";
1462        String testString = "\uD9A0\uDE81";
1463        Pattern pat = Pattern.compile(patString);
1464        Matcher mat = pat.matcher(testString);
1465        assertFalse(mat.matches());
1466
1467        patString = "(\uD9A0)";
1468        testString = "\uD9A0\uDE81";
1469        pat = Pattern.compile(patString, Pattern.DOTALL);
1470        mat = pat.matcher(testString);
1471        assertFalse(mat.find());
1472    }
1473
1474    /*
1475     * Regression test for HARMONY-688
1476     */
1477    public void testUnicodeCategoryWithSurrogatesSupplementary() {
1478        Pattern p = Pattern.compile("\\p{javaLowerCase}");
1479        Matcher matcher = p.matcher("\uD801\uDC28");
1480        assertTrue(matcher.find());
1481    }
1482
1483    public void testSplitEmpty() {
1484
1485        Pattern pat = Pattern.compile("");
1486        String[] s = pat.split("", -1);
1487
1488        assertEquals(1, s.length);
1489        assertEquals("", s[0]);
1490    }
1491
1492    public void testToString() {
1493        for (int i = 0; i < testPatterns.length; i++) {
1494            Pattern p = Pattern.compile(testPatterns[i]);
1495            assertEquals(testPatterns[i], p.toString());
1496        }
1497    }
1498
1499    // http://code.google.com/p/android/issues/detail?id=19308
1500    public void test_hitEnd() {
1501        Pattern p = Pattern.compile("^2(2[4-9]|3\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}$");
1502        Matcher m = p.matcher("224..");
1503        boolean isPartialMatch = !m.matches() && m.hitEnd();
1504        assertFalse(isPartialMatch);
1505    }
1506
1507    public void testCommentsInPattern() {
1508        Pattern p = Pattern.compile("ab# this is a comment\ncd", Pattern.COMMENTS);
1509        assertTrue(p.matcher("abcd").matches());
1510    }
1511
1512    public void testCompileNonCaptGroup() {
1513        // icu4c doesn't support CANON_EQ.
1514        Pattern.compile("(?:)"/*, Pattern.CANON_EQ*/);
1515        Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.DOTALL);
1516        Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.CASE_INSENSITIVE);
1517        Pattern.compile("(?:)", /*Pattern.CANON_EQ |*/ Pattern.COMMENTS | Pattern.UNIX_LINES);
1518    }
1519
1520    public void testFlagsMethod() {
1521        // icu4c doesn't count inline flags that span the entire regex as being global flags.
1522        // Android just returns those flags actually passed to Pattern.compile.
1523        if (true) {
1524            return;
1525        }
1526
1527        String baseString;
1528        Pattern pat;
1529
1530        // These tests are for compatibility with RI only. Logically we have to
1531        // return only flags specified during the compilation. For example
1532        // pat.flags() == 0 when we compile Pattern pat =
1533        // Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled
1534        // in a case insensitive manner. So there is little sense to do calls to
1535        // flags() now.
1536        baseString = "(?-i)";
1537        pat = Pattern.compile(baseString);
1538
1539        baseString = "(?idmsux)abc(?-i)vg(?-dmu)";
1540        pat = Pattern.compile(baseString);
1541        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
1542
1543        baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)";
1544        pat = Pattern.compile(baseString);
1545        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
1546
1547        baseString = "(?is)a((?x)b.)";
1548        pat = Pattern.compile(baseString);
1549        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
1550
1551        baseString = "(?i)a((?-i))";
1552        pat = Pattern.compile(baseString);
1553        assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE);
1554
1555        baseString = "((?i)a)";
1556        pat = Pattern.compile(baseString);
1557        assertEquals(pat.flags(), 0);
1558
1559        pat = Pattern.compile("(?is)abc");
1560        assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
1561    }
1562
1563    public void testCanonEqFlag() {
1564        // icu4c doesn't support CANON_EQ.
1565        if (true) {
1566            return;
1567        }
1568
1569        // for decompositions see
1570        // http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt
1571        // http://www.unicode.org/reports/tr15/#Decomposition
1572        String baseString;
1573        String testString;
1574        Pattern pat;
1575        Matcher mat;
1576
1577        baseString = "ab(a*)\\1";
1578        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1579
1580        baseString = "a(abcdf)d";
1581        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1582
1583        baseString = "aabcdfd";
1584        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1585
1586        // \u01E0 -> \u0226\u0304 ->\u0041\u0307\u0304
1587        // \u00CC -> \u0049\u0300
1588
1589        baseString = "\u01E0\u00CCcdb(ac)";
1590        testString = "\u0226\u0304\u0049\u0300cdbac";
1591        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1592        mat = pat.matcher(testString);
1593        assertTrue(mat.matches());
1594
1595        baseString = "\u01E0cdb(a\u00CCc)";
1596        testString = "\u0041\u0307\u0304cdba\u0049\u0300c";
1597        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1598        mat = pat.matcher(testString);
1599        assertTrue(mat.matches());
1600
1601        baseString = "a\u00CC";
1602        testString = "a\u0049\u0300";
1603        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1604        mat = pat.matcher(testString);
1605        assertTrue(mat.matches());
1606
1607        baseString = "\u0226\u0304cdb(ac\u0049\u0300)";
1608        testString = "\u01E0cdbac\u00CC";
1609        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1610        mat = pat.matcher(testString);
1611        assertTrue(mat.matches());
1612
1613        baseString = "cdb(?:\u0041\u0307\u0304\u00CC)";
1614        testString = "cdb\u0226\u0304\u0049\u0300";
1615        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1616        mat = pat.matcher(testString);
1617        assertTrue(mat.matches());
1618
1619        baseString = "\u01E0[a-c]\u0049\u0300cdb(ac)";
1620        testString = "\u01E0b\u00CCcdbac";
1621        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1622        mat = pat.matcher(testString);
1623        assertTrue(mat.matches());
1624
1625        baseString = "\u01E0|\u00CCcdb(ac)";
1626        testString = "\u0041\u0307\u0304";
1627        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1628        mat = pat.matcher(testString);
1629        assertTrue(mat.matches());
1630
1631        baseString = "\u00CC?cdb(ac)*(\u01E0)*[a-c]";
1632        testString = "cdb\u0041\u0307\u0304b";
1633        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1634        mat = pat.matcher(testString);
1635        assertTrue(mat.matches());
1636
1637        baseString = "a\u0300";
1638        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1639        mat = pat.matcher("a\u00E0a");
1640        assertTrue(mat.find());
1641
1642        baseString = "\u7B20\uF9F8abc";
1643        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1644        mat = pat.matcher("\uF9F8\uF9F8abc");
1645        assertTrue(mat.matches());
1646
1647        // \u01F9 -> \u006E\u0300
1648        // \u00C3 -> \u0041\u0303
1649
1650        baseString = "cdb(?:\u00C3\u006E\u0300)";
1651        testString = "cdb\u0041\u0303\u01F9";
1652        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1653        mat = pat.matcher(testString);
1654        assertTrue(mat.matches());
1655
1656        // \u014C -> \u004F\u0304
1657        // \u0163 -> \u0074\u0327
1658
1659        baseString = "cdb(?:\u0163\u004F\u0304)";
1660        testString = "cdb\u0074\u0327\u014C";
1661        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1662        mat = pat.matcher(testString);
1663        assertTrue(mat.matches());
1664
1665        // \u00E1->a\u0301
1666        // canonical ordering takes place \u0301\u0327 -> \u0327\u0301
1667
1668        baseString = "c\u0327\u0301";
1669        testString = "c\u0301\u0327";
1670        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1671        mat = pat.matcher(testString);
1672        assertTrue(mat.matches());
1673
1674        /*
1675        Hangul decompositions
1676        */
1677        // \uD4DB->\u1111\u1171\u11B6
1678        // \uD21E->\u1110\u116D\u11B5
1679        // \uD264->\u1110\u1170
1680        // not Hangul:\u0453->\u0433\u0301
1681        baseString = "a\uD4DB\u1111\u1171\u11B6\uD264";
1682        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1683
1684        baseString = "\u0453c\uD4DB";
1685        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1686
1687        baseString = "a\u1110\u116D\u11B5b\uD21Ebc";
1688        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1689
1690        baseString = "\uD4DB\uD21E\u1110\u1170cdb(ac)";
1691        testString = "\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac";
1692        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1693        mat = pat.matcher(testString);
1694        assertTrue(mat.matches());
1695
1696        baseString = "\uD4DB\uD264cdb(a\uD21Ec)";
1697        testString = "\u1111\u1171\u11B6\u1110\u1170cdba\u1110\u116D\u11B5c";
1698        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1699        mat = pat.matcher(testString);
1700        assertTrue(mat.matches());
1701
1702        baseString = "a\uD4DB";
1703        testString = "a\u1111\u1171\u11B6";
1704        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1705        mat = pat.matcher(testString);
1706        assertTrue(mat.matches());
1707
1708        baseString = "a\uD21E";
1709        testString = "a\u1110\u116D\u11B5";
1710        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1711        mat = pat.matcher(testString);
1712        assertTrue(mat.matches());
1713
1714        baseString = "\u1111\u1171\u11B6cdb(ac\u1110\u116D\u11B5)";
1715        testString = "\uD4DBcdbac\uD21E";
1716        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1717        mat = pat.matcher(testString);
1718        assertTrue(mat.matches());
1719
1720        baseString = "cdb(?:\u1111\u1171\u11B6\uD21E)";
1721        testString = "cdb\uD4DB\u1110\u116D\u11B5";
1722        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1723        mat = pat.matcher(testString);
1724        assertTrue(mat.matches());
1725
1726        baseString = "\uD4DB[a-c]\u1110\u116D\u11B5cdb(ac)";
1727        testString = "\uD4DBb\uD21Ecdbac";
1728        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1729        mat = pat.matcher(testString);
1730        assertTrue(mat.matches());
1731
1732        baseString = "\uD4DB|\u00CCcdb(ac)";
1733        testString = "\u1111\u1171\u11B6";
1734        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1735        mat = pat.matcher(testString);
1736        assertTrue(mat.matches());
1737
1738        baseString = "\uD4DB|\u00CCcdb(ac)";
1739        testString = "\u1111\u1171";
1740        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1741        mat = pat.matcher(testString);
1742        assertFalse(mat.matches());
1743
1744        baseString = "\u00CC?cdb(ac)*(\uD4DB)*[a-c]";
1745        testString = "cdb\u1111\u1171\u11B6b";
1746        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1747        mat = pat.matcher(testString);
1748        assertTrue(mat.matches());
1749
1750        baseString = "\uD4DB";
1751        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1752        mat = pat.matcher("a\u1111\u1171\u11B6a");
1753        assertTrue(mat.find());
1754
1755        baseString = "\u1111";
1756        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1757        mat = pat.matcher("bcda\uD4DBr");
1758        assertFalse(mat.find());
1759    }
1760
1761    public void testIndexesCanonicalEq() {
1762        // icu4c doesn't support CANON_EQ.
1763        if (true) {
1764            return;
1765        }
1766
1767        String baseString;
1768        String testString;
1769        Pattern pat;
1770        Matcher mat;
1771
1772        baseString = "\uD4DB";
1773        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1774        mat = pat.matcher("bcda\u1111\u1171\u11B6awr");
1775        assertTrue(mat.find());
1776        assertEquals(mat.start(), 4);
1777        assertEquals(mat.end(), 7);
1778
1779        baseString = "\uD4DB\u1111\u1171\u11B6";
1780        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1781        mat = pat.matcher("bcda\u1111\u1171\u11B6\uD4DBawr");
1782        assertTrue(mat.find());
1783        assertEquals(mat.start(), 4);
1784        assertEquals(mat.end(), 8);
1785
1786        baseString = "\uD4DB\uD21E\u1110\u1170";
1787        testString = "abcabc\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac";
1788        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1789        mat = pat.matcher(testString);
1790        assertTrue(mat.find());
1791        assertEquals(mat.start(), 6);
1792        assertEquals(mat.end(), 13);
1793    }
1794
1795    public void testCanonEqFlagWithSupplementaryCharacters() {
1796        // icu4c doesn't support CANON_EQ.
1797        if (true) {
1798            return;
1799        }
1800
1801        /*
1802        \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32
1803        \uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F
1804        ->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16
1805        */
1806        String patString = "abc\uD834\uDDBFef";
1807        String testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
1808        Pattern pat = Pattern.compile(patString, Pattern.CANON_EQ);
1809        Matcher mat = pat.matcher(testString);
1810        assertTrue(mat.matches());
1811
1812        testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
1813        mat = pat.matcher(testString);
1814        assertTrue(mat.matches());
1815
1816        patString = "abc\uD834\uDDBB\uD834\uDD6Fef";
1817        testString = "abc\uD834\uDDBFef";
1818        pat = Pattern.compile(patString, Pattern.CANON_EQ);
1819        mat = pat.matcher(testString);
1820        assertTrue(mat.matches());
1821
1822        testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
1823        mat = pat.matcher(testString);
1824        assertTrue(mat.matches());
1825
1826        patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
1827        testString = "abc\uD834\uDDBFef";
1828        pat = Pattern.compile(patString, Pattern.CANON_EQ);
1829        mat = pat.matcher(testString);
1830        assertTrue(mat.matches());
1831
1832        testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
1833        mat = pat.matcher(testString);
1834        assertTrue(mat.matches());
1835
1836        // Test supplementary characters with no decomposition
1837        patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef";
1838        testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef";
1839        pat = Pattern.compile(patString, Pattern.CANON_EQ);
1840        mat = pat.matcher(testString);
1841        assertTrue(mat.matches());
1842    }
1843
1844    public void testAsPredicate() {
1845        String[][] posSeq = {
1846                { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
1847                { "213567", "12324567", "1234567", "213213567",
1848                        "21312312312567", "444444567" },
1849                { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
1850                { "213234567", "3458", "0987654", "7689546432", "0398576",
1851                        "98432", "5" },
1852                {
1853                        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
1854                        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
1855                                + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
1856                { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
1857                        "abbbAbbbliceaaa", "Alice" },
1858                { "a123", "bnxnvgds156", "for", "while", "if", "struct" },
1859                { "xy" }, { "xy" }, { "xcy" }
1860        };
1861
1862        for (int i = 0; i < testPatterns.length; i++) {
1863            Pattern p = Pattern.compile(testPatterns[i]);
1864            for (int j = 0; j < posSeq[i].length; j++) {
1865                assertTrue(p.asPredicate().test(posSeq[i][j]));
1866            }
1867        }
1868    }
1869
1870    public void testSplitAsStream() {
1871        String s[];
1872        Pattern pat = Pattern.compile("b");
1873        s = pat.splitAsStream("abccbadfebb").toArray(String[]::new);
1874        assertEquals(s.length, 3);
1875        s = pat.splitAsStream("").toArray(String[]::new);
1876        assertEquals(s.length, 0);
1877        pat = Pattern.compile("");
1878        s = pat.splitAsStream("").toArray(String[]::new);
1879        assertEquals(s.length, 0);
1880        s = pat.splitAsStream("abccbadfe").toArray(String[]::new);
1881        assertEquals(s.length, 9);
1882    }
1883}
1884