1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package org.apache.harmony.tests.java.util.regex;
19
20import java.io.Serializable;
21import java.util.regex.Matcher;
22import java.util.regex.Pattern;
23import java.util.regex.PatternSyntaxException;
24
25import junit.framework.TestCase;
26
27import org.apache.harmony.testframework.serialization.SerializationTest;
28import org.apache.harmony.testframework.serialization.SerializationTest.SerializableAssert;
29
30@SuppressWarnings("nls")
31public class PatternTest extends TestCase {
32    String[] testPatterns = {
33            "(a|b)*abb",
34            "(1*2*3*4*)*567",
35            "(a|b|c|d)*aab",
36            "(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*",
37            "(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*",
38            "(a|b)*(a|b)*A(a|b)*lice.*",
39            "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|"
40                    + "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do",
41            "x(?c)y", "x(?cc)y", "x(?:c)y"
42
43    };
44
45    public PatternTest(String name) {
46        super(name);
47    }
48
49    public void testCommentsInPattern() {
50        Pattern p = Pattern.compile("ab# this is a comment\ncd", Pattern.COMMENTS);
51        assertTrue(p.matcher("abcd").matches());
52    }
53
54    /*
55     * Class under test for String[] split(CharSequence, int)
56     */
57    public void testSplitCharSequenceint() {
58        // splitting CharSequence which ends with pattern
59        // bug6193
60        assertEquals(",,".split(",", 3).length, 3);
61        assertEquals(",,".split(",", 4).length, 3);
62        // bug6193
63        // bug5391
64        assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5);
65        assertEquals(Pattern.compile("b").split("ab", -1).length, 2);
66        // bug5391
67        String s[];
68        Pattern pat = Pattern.compile("x");
69        s = pat.split("zxx:zzz:zxx", 10);
70        assertEquals(s.length, 5);
71        s = pat.split("zxx:zzz:zxx", 3);
72        assertEquals(s.length, 3);
73        s = pat.split("zxx:zzz:zxx", -1);
74        assertEquals(s.length, 5);
75        s = pat.split("zxx:zzz:zxx", 0);
76        assertEquals(s.length, 3);
77        // other splitting
78        // negative limit
79        pat = Pattern.compile("b");
80        s = pat.split("abccbadfebb", -1);
81        assertEquals(s.length, 5);
82        s = pat.split("", -1);
83        assertEquals(s.length, 1);
84        pat = Pattern.compile("");
85        s = pat.split("", -1);
86        assertEquals(s.length, 1);
87        s = pat.split("abccbadfe", -1);
88        assertEquals(s.length, 11);
89        // zero limit
90        pat = Pattern.compile("b");
91        s = pat.split("abccbadfebb", 0);
92        assertEquals(s.length, 3);
93        s = pat.split("", 0);
94        assertEquals(s.length, 1);
95        pat = Pattern.compile("");
96        s = pat.split("", 0);
97        assertEquals(s.length, 1);
98        s = pat.split("abccbadfe", 0);
99        assertEquals(s.length, 10);
100        // positive limit
101        pat = Pattern.compile("b");
102        s = pat.split("abccbadfebb", 12);
103        assertEquals(s.length, 5);
104        s = pat.split("", 6);
105        assertEquals(s.length, 1);
106        pat = Pattern.compile("");
107        s = pat.split("", 11);
108        assertEquals(s.length, 1);
109        s = pat.split("abccbadfe", 15);
110        assertEquals(s.length, 11);
111
112        pat = Pattern.compile("b");
113        s = pat.split("abccbadfebb", 5);
114        assertEquals(s.length, 5);
115        s = pat.split("", 1);
116        assertEquals(s.length, 1);
117        pat = Pattern.compile("");
118        s = pat.split("", 1);
119        assertEquals(s.length, 1);
120        s = pat.split("abccbadfe", 11);
121        assertEquals(s.length, 11);
122
123        pat = Pattern.compile("b");
124        s = pat.split("abccbadfebb", 3);
125        assertEquals(s.length, 3);
126        pat = Pattern.compile("");
127        s = pat.split("abccbadfe", 5);
128        assertEquals(s.length, 5);
129    }
130
131    /*
132     * Class under test for String[] split(CharSequence)
133     */
134    public void testSplitCharSequence() {
135        String s[];
136        Pattern pat = Pattern.compile("b");
137        s = pat.split("abccbadfebb");
138        assertEquals(s.length, 3);
139        s = pat.split("");
140        assertEquals(s.length, 1);
141        pat = Pattern.compile("");
142        s = pat.split("");
143        assertEquals(s.length, 1);
144        s = pat.split("abccbadfe");
145        assertEquals(s.length, 10);
146        // bug6544
147        String s1 = "";
148        String[] arr = s1.split(":");
149        assertEquals(arr.length, 1);
150        // bug6544
151    }
152
153    public void testPattern() {
154    }
155
156    public void testFlags() {
157        String baseString;
158        String testString;
159        Pattern pat;
160        Matcher mat;
161
162        baseString = "((?i)|b)a";
163        testString = "A";
164        pat = Pattern.compile(baseString);
165        mat = pat.matcher(testString);
166        assertFalse(mat.matches());
167
168        baseString = "(?i)a|b";
169        testString = "A";
170        pat = Pattern.compile(baseString);
171        mat = pat.matcher(testString);
172        assertTrue(mat.matches());
173
174        baseString = "(?i)a|b";
175        testString = "B";
176        pat = Pattern.compile(baseString);
177        mat = pat.matcher(testString);
178        assertTrue(mat.matches());
179
180        baseString = "c|(?i)a|b";
181        testString = "B";
182        pat = Pattern.compile(baseString);
183        mat = pat.matcher(testString);
184        assertTrue(mat.matches());
185
186        baseString = "(?i)a|(?s)b";
187        testString = "B";
188        pat = Pattern.compile(baseString);
189        mat = pat.matcher(testString);
190        assertTrue(mat.matches());
191
192        baseString = "(?i)a|(?-i)b";
193        testString = "B";
194        pat = Pattern.compile(baseString);
195        mat = pat.matcher(testString);
196        assertFalse(mat.matches());
197
198        baseString = "(?i)a|(?-i)c|b";
199        testString = "B";
200        pat = Pattern.compile(baseString);
201        mat = pat.matcher(testString);
202        assertFalse(mat.matches());
203
204        baseString = "(?i)a|(?-i)c|(?i)b";
205        testString = "B";
206        pat = Pattern.compile(baseString);
207        mat = pat.matcher(testString);
208        assertTrue(mat.matches());
209
210        baseString = "(?i)a|(?-i)b";
211        testString = "A";
212        pat = Pattern.compile(baseString);
213        mat = pat.matcher(testString);
214        assertTrue(mat.matches());
215
216        baseString = "((?i))a";
217        testString = "A";
218        pat = Pattern.compile(baseString);
219        mat = pat.matcher(testString);
220        assertFalse(mat.matches());
221
222        baseString = "|(?i)|a";
223        testString = "A";
224        pat = Pattern.compile(baseString);
225        mat = pat.matcher(testString);
226        assertTrue(mat.matches());
227
228        baseString = "(?i)((?s)a.)";
229        testString = "A\n";
230        pat = Pattern.compile(baseString);
231        mat = pat.matcher(testString);
232        assertTrue(mat.matches());
233
234        baseString = "(?i)((?-i)a)";
235        testString = "A";
236        pat = Pattern.compile(baseString);
237        mat = pat.matcher(testString);
238        assertFalse(mat.matches());
239
240        baseString = "(?i)(?s:a.)";
241        testString = "A\n";
242        pat = Pattern.compile(baseString);
243        mat = pat.matcher(testString);
244        assertTrue(mat.matches());
245
246        baseString = "(?i)fgh(?s:aa)";
247        testString = "fghAA";
248        pat = Pattern.compile(baseString);
249        mat = pat.matcher(testString);
250        assertTrue(mat.matches());
251
252        baseString = "(?i)((?-i))a";
253        testString = "A";
254        pat = Pattern.compile(baseString);
255        mat = pat.matcher(testString);
256        assertTrue(mat.matches());
257
258        baseString = "abc(?i)d";
259        testString = "ABCD";
260        pat = Pattern.compile(baseString);
261        mat = pat.matcher(testString);
262        assertFalse(mat.matches());
263
264        testString = "abcD";
265        mat = pat.matcher(testString);
266        assertTrue(mat.matches());
267
268        baseString = "a(?i)a(?-i)a(?i)a(?-i)a";
269        testString = "aAaAa";
270        pat = Pattern.compile(baseString);
271        mat = pat.matcher(testString);
272        assertTrue(mat.matches());
273
274        testString = "aAAAa";
275        mat = pat.matcher(testString);
276        assertFalse(mat.matches());
277    }
278
279    public void testFlagsMethod() {
280        String baseString;
281        Pattern pat;
282
283        /*
284         * These tests are for compatibility with RI only. Logically we have to
285         * return only flags specified during the compilation. For example
286         * pat.flags() == 0 when we compile Pattern pat =
287         * Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled
288         * in a case insensitive manner. So there is little sense to do calls to
289         * flags() now.
290         */
291        baseString = "(?-i)";
292        pat = Pattern.compile(baseString);
293
294        baseString = "(?idmsux)abc(?-i)vg(?-dmu)";
295        pat = Pattern.compile(baseString);
296        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
297
298        baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)";
299        pat = Pattern.compile(baseString);
300        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS);
301
302        baseString = "(?is)a((?x)b.)";
303        pat = Pattern.compile(baseString);
304        assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
305
306        baseString = "(?i)a((?-i))";
307        pat = Pattern.compile(baseString);
308        assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE);
309
310        baseString = "((?i)a)";
311        pat = Pattern.compile(baseString);
312        assertEquals(pat.flags(), 0);
313
314        pat = Pattern.compile("(?is)abc");
315        assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
316    }
317
318    /*
319     * Class under test for Pattern compile(String, int)
320     */
321    public void testCompileStringint() {
322        /*
323         * this tests are needed to verify that appropriate exceptions are
324         * thrown
325         */
326        String pattern = "b)a";
327        try {
328            Pattern.compile(pattern);
329            fail("Expected a PatternSyntaxException when compiling pattern: "
330                    + pattern);
331        } catch (PatternSyntaxException e) {
332            // pass
333        }
334        pattern = "bcde)a";
335        try {
336            Pattern.compile(pattern);
337            fail("Expected a PatternSyntaxException when compiling pattern: "
338                    + pattern);
339        } catch (PatternSyntaxException e) {
340            // pass
341        }
342        pattern = "bbg())a";
343        try {
344            Pattern.compile(pattern);
345            fail("Expected a PatternSyntaxException when compiling pattern: "
346                    + pattern);
347        } catch (PatternSyntaxException e) {
348            // pass
349        }
350
351        pattern = "cdb(?i))a";
352        try {
353            Pattern.compile(pattern);
354            fail("Expected a PatternSyntaxException when compiling pattern: "
355                    + pattern);
356        } catch (PatternSyntaxException e) {
357            // pass
358        }
359
360        /*
361         * This pattern should compile - HARMONY-2127
362         */
363        pattern = "x(?c)y";
364        Pattern.compile(pattern);
365
366        /*
367         * this pattern doesn't match any string, but should be compiled anyway
368         */
369        pattern = "(b\\1)a";
370        Pattern.compile(pattern);
371    }
372
373    /*
374     * Class under test for Pattern compile(String)
375     */
376    public void testQuantCompileNeg() {
377        String[] patterns = { "5{,2}", "{5asd", "{hgdhg", "{5,hjkh", "{,5hdsh",
378                "{5,3shdfkjh}" };
379        for (String element : patterns) {
380            try {
381                Pattern.compile(element);
382                fail("PatternSyntaxException was expected, but compilation succeeds");
383            } catch (PatternSyntaxException pse) {
384                continue;
385            }
386        }
387        // Regression for HARMONY-1365
388        String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\G*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\B*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]*+)|(?x-xd:^{5}+)()";
389        assertNotNull(Pattern.compile(pattern));
390    }
391
392    public void testQuantCompilePos() {
393        String[] patterns = {/* "(abc){1,3}", */"abc{2,}", "abc{5}" };
394        for (String element : patterns) {
395            Pattern.compile(element);
396        }
397    }
398
399    public void testQuantComposition() {
400        String pattern = "(a{1,3})aab";
401        java.util.regex.Pattern pat = java.util.regex.Pattern.compile(pattern);
402        java.util.regex.Matcher mat = pat.matcher("aaab");
403        mat.matches();
404        mat.start(1);
405        mat.group(1);
406    }
407
408    public void testMatches() {
409        String[][] posSeq = {
410                { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" },
411                { "213567", "12324567", "1234567", "213213567",
412                        "21312312312567", "444444567" },
413                { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" },
414                { "213234567", "3458", "0987654", "7689546432", "0398576",
415                        "98432", "5" },
416                {
417                        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
418                        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
419                                + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" },
420                { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa",
421                        "abbbAbbbliceaaa", "Alice" },
422                { "a123", "bnxnvgds156", "for", "while", "if", "struct" },
423                { "xy" }, { "xy" }, { "xcy" }
424
425        };
426
427        for (int i = 0; i < testPatterns.length; i++) {
428            for (int j = 0; j < posSeq[i].length; j++) {
429                assertTrue("Incorrect match: " + testPatterns[i] + " vs "
430                        + posSeq[i][j], Pattern.matches(testPatterns[i],
431                        posSeq[i][j]));
432            }
433        }
434    }
435
436    public void testTimeZoneIssue() {
437        Pattern p = Pattern.compile("GMT(\\+|\\-)(\\d+)(:(\\d+))?");
438        Matcher m = p.matcher("GMT-9:45");
439        assertTrue(m.matches());
440        assertEquals("-", m.group(1));
441        assertEquals("9", m.group(2));
442        assertEquals(":45", m.group(3));
443        assertEquals("45", m.group(4));
444    }
445
446    public void testCompileRanges() {
447        String[] correctTestPatterns = { "[^]*abb]*", "[^a-d[^m-p]]*abb",
448                "[a-d\\d]*abb", "[abc]*abb", "[a-e&&[de]]*abb", "[^abc]*abb",
449                "[a-e&&[^de]]*abb", "[a-z&&[^m-p]]*abb", "[a-d[m-p]]*abb",
450                "[a-zA-Z]*abb", "[+*?]*abb", "[^+*?]*abb" };
451
452        String[] inputSecuence = { "kkkk", "admpabb", "abcabcd124654abb",
453                "abcabccbacababb", "dededededededeedabb", "gfdhfghgdfghabb",
454                "accabacbcbaabb", "acbvfgtyabb", "adbcacdbmopabcoabb",
455                "jhfkjhaSDFGHJkdfhHNJMjkhfabb", "+*??+*abb", "sdfghjkabb" };
456
457        for (int i = 0; i < correctTestPatterns.length; i++) {
458            assertTrue("pattern: " + correctTestPatterns[i] + " input: "
459                    + inputSecuence[i], Pattern.matches(correctTestPatterns[i],
460                    inputSecuence[i]));
461
462        }
463
464        String[] wrongInputSecuence = { "]", "admpkk", "abcabcd124k654abb",
465                "abwcabccbacababb", "abababdeababdeabb", "abcabcacbacbabb",
466                "acdcbecbaabb", "acbotyabb", "adbcaecdbmopabcoabb",
467                "jhfkjhaSDFGHJk;dfhHNJMjkhfabb", "+*?a?+*abb", "sdf+ghjkabb" };
468
469        for (int i = 0; i < correctTestPatterns.length; i++) {
470            assertFalse("pattern: " + correctTestPatterns[i] + " input: "
471                    + wrongInputSecuence[i], Pattern.matches(
472                    correctTestPatterns[i], wrongInputSecuence[i]));
473
474        }
475    }
476
477    public void testRangesSpecialCases() {
478        String neg_patterns[] = { "[a-&&[b-c]]", "[a-\\w]", "[b-a]", "[]" };
479
480        for (String element : neg_patterns) {
481            try {
482                Pattern.compile(element);
483                fail("PatternSyntaxException was expected: " + element);
484            } catch (PatternSyntaxException pse) {
485            }
486        }
487
488        String pos_patterns[] = { "[-]+", "----", "[a-]+", "a-a-a-a-aa--",
489                "[\\w-a]+", "123-2312--aaa-213", "[a-]]+", "-]]]]]]]]]]]]]]]" };
490
491        for (int i = 0; i < pos_patterns.length; i++) {
492            String pat = pos_patterns[i++];
493            String inp = pos_patterns[i];
494            assertTrue("pattern: " + pat + " input: " + inp, Pattern.matches(
495                    pat, inp));
496        }
497    }
498
499    public void testZeroSymbols() {
500        assertTrue(Pattern.matches("[\0]*abb", "\0\0\0\0\0\0abb"));
501    }
502
503    public void testEscapes() {
504        Pattern pat = Pattern.compile("\\Q{]()*?");
505        Matcher mat = pat.matcher("{]()*?");
506
507        assertTrue(mat.matches());
508    }
509
510    public void testRegressions() {
511        // Bug 181
512        Pattern.compile("[\\t-\\r]");
513
514        // HARMONY-4472
515        Pattern.compile("a*.+");
516
517        // Bug187
518        Pattern
519                .compile("|(?idmsux-idmsux)|(?idmsux-idmsux)|[^|\\[-\\0274|\\,-\\\\[^|W\\}\\nq\\x65\\002\\xFE\\05\\06\\00\\x66\\x47i\\,\\xF2\\=\\06\\u0EA4\\x9B\\x3C\\f\\|\\{\\xE5\\05\\r\\u944A\\xCA\\e|\\x19\\04\\x07\\04\\u607B\\023\\0073\\x91Tr\\0150\\x83]]?(?idmsux-idmsux:\\p{Alpha}{7}?)||(?<=[^\\uEC47\\01\\02\\u3421\\a\\f\\a\\013q\\035w\\e])(?<=\\p{Punct}{0,}?)(?=^\\p{Lower})(?!\\b{8,14})(?<![|\\00-\\0146[^|\\04\\01\\04\\060\\f\\u224DO\\x1A\\xC4\\00\\02\\0315\\0351\\u84A8\\xCBt\\xCC\\06|\\0141\\00\\=\\e\\f\\x6B\\0026Tb\\040\\x76xJ&&[\\\\-\\]\\05\\07\\02\\u2DAF\\t\\x9C\\e\\0023\\02\\,X\\e|\\u6058flY\\u954C]]]{5}?)(?<=\\p{Sc}{8}+)[^|\\026-\\u89BA|o\\u6277\\t\\07\\x50&&\\p{Punct}]{8,14}+((?<=^\\p{Punct})|(?idmsux-idmsux)||(?>[\\x3E-\\]])|(?idmsux-idmsux:\\p{Punct})|(?<![\\0111\\0371\\xDF\\u6A49\\07\\u2A4D\\00\\0212\\02Xd-\\xED[^\\a-\\0061|\\0257\\04\\f\\[\\0266\\043\\03\\x2D\\042&&[^\\f-\\]&&\\s]]])|(?>[|\\n\\042\\uB09F\\06\\u0F2B\\uC96D\\x89\\uC166\\xAA|\\04-\\][^|\\a\\|\\rx\\04\\uA770\\n\\02\\t\\052\\056\\0274\\|\\=\\07\\e|\\00-\\x1D&&[^\\005\\uB15B\\uCDAC\\n\\x74\\0103\\0147\\uD91B\\n\\062G\\u9B4B\\077\\}\\0324&&[^\\0302\\,\\0221\\04\\u6D16\\04xy\\uD193\\[\\061\\06\\045\\x0F|\\e\\xBB\\f\\u1B52\\023\\u3AD2\\033\\007\\022\\}\\x66\\uA63FJ-\\0304]]]]{0,0})||(?<![^|\\0154U\\u0877\\03\\fy\\n\\|\\0147\\07-\\=[|q\\u69BE\\0243\\rp\\053\\02\\x33I\\u5E39\\u9C40\\052-\\xBC[|\\0064-\\?|\\uFC0C\\x30\\0060\\x45\\\\\\02\\?p\\xD8\\0155\\07\\0367\\04\\uF07B\\000J[^|\\0051-\\{|\\u9E4E\\u7328\\]\\u6AB8\\06\\x71\\a\\]\\e\\|KN\\u06AA\\0000\\063\\u2523&&[\\005\\0277\\x41U\\034\\}R\\u14C7\\u4767\\x09\\n\\054Ev\\0144\\<\\f\\,Q-\\xE4]]]]]{3}+)|(?>^+)|(?![^|\\|\\nJ\\t\\<\\04E\\\\\\t\\01\\\\\\02\\|\\=\\}\\xF3\\uBEC2\\032K\\014\\uCC5F\\072q\\|\\0153\\xD9\\0322\\uC6C8[^\\t\\0342\\x34\\x91\\06\\{\\xF1\\a\\u1710\\?\\xE7\\uC106\\02pF\\<&&[^|\\]\\064\\u381D\\u50CF\\eO&&[^|\\06\\x2F\\04\\045\\032\\u8536W\\0377\\0017|\\x06\\uE5FA\\05\\xD4\\020\\04c\\xFC\\02H\\x0A\\r]]]]+?)(?idmsux-idmsux)|(?<![|\\r-\\,&&[I\\t\\r\\0201\\xDB\\e&&[^|\\02\\06\\00\\<\\a\\u7952\\064\\051\\073\\x41\\?n\\040\\0053\\031&&[\\x15-\\|]]]]{8,11}?)(?![^|\\<-\\uA74B\\xFA\\u7CD2\\024\\07n\\<\\x6A\\0042\\uE4FF\\r\\u896B\\[\\=\\042Y&&^\\p{ASCII}]++)|(?<![R-\\|&&[\\a\\0120A\\u6145\\<\\050-d[|\\e-\\uA07C|\\016-\\u80D9]]]{1,}+)|(?idmsux-idmsux)|(?idmsux-idmsux)|(?idmsux-idmsux:\\B{6,}?)|(?<=\\D{5,8}?)|(?>[\\{-\\0207|\\06-\\0276\\p{XDigit}])(?idmsux-idmsux:[^|\\x52\\0012\\]u\\xAD\\0051f\\0142\\\\l\\|\\050\\05\\f\\t\\u7B91\\r\\u7763\\{|h\\0104\\a\\f\\0234\\u2D4F&&^\\P{InGreek}]))");
520        // HARMONY-5858
521        Pattern.compile("\\u6211", Pattern.LITERAL);
522    }
523
524    public void testOrphanQuantifiers() {
525        try {
526            Pattern.compile("+++++");
527            fail("PatternSyntaxException expected");
528        } catch (PatternSyntaxException pse) {
529        }
530    }
531
532    public void testOrphanQuantifiers2() {
533        try {
534            Pattern.compile("\\d+*");
535            fail("PatternSyntaxException expected");
536        } catch (PatternSyntaxException pse) {
537        }
538    }
539
540    public void testBug197() {
541        Object[] vals = { ":", new Integer(2),
542                new String[] { "boo", "and:foo" }, ":", new Integer(5),
543                new String[] { "boo", "and", "foo" }, ":", new Integer(-2),
544                new String[] { "boo", "and", "foo" }, ":", new Integer(3),
545                new String[] { "boo", "and", "foo" }, ":", new Integer(1),
546                new String[] { "boo:and:foo" }, "o", new Integer(5),
547                new String[] { "b", "", ":and:f", "", "" }, "o",
548                new Integer(4), new String[] { "b", "", ":and:f", "o" }, "o",
549                new Integer(-2), new String[] { "b", "", ":and:f", "", "" },
550                "o", new Integer(0), new String[] { "b", "", ":and:f" } };
551
552        for (int i = 0; i < vals.length / 3;) {
553            String[] res = Pattern.compile(vals[i++].toString()).split(
554                    "boo:and:foo", ((Integer) vals[i++]).intValue());
555            String[] expectedRes = (String[]) vals[i++];
556
557            assertEquals(expectedRes.length, res.length);
558
559            for (int j = 0; j < expectedRes.length; j++) {
560                assertEquals(expectedRes[j], res[j]);
561            }
562        }
563    }
564
565    public void testURIPatterns() {
566        String URI_REGEXP_STR = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
567        String SCHEME_REGEXP_STR = "^[a-zA-Z]{1}[\\w+-.]+$";
568        String REL_URI_REGEXP_STR = "^(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
569        String IPV6_REGEXP_STR = "^[0-9a-fA-F\\:\\.]+(\\%\\w+)?$";
570        String IPV6_REGEXP_STR2 = "^\\[[0-9a-fA-F\\:\\.]+(\\%\\w+)?\\]$";
571        String IPV4_REGEXP_STR = "^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$";
572        String HOSTNAME_REGEXP_STR = "\\w+[\\w\\-\\.]*";
573
574        Pattern.compile(URI_REGEXP_STR);
575        Pattern.compile(REL_URI_REGEXP_STR);
576        Pattern.compile(SCHEME_REGEXP_STR);
577        Pattern.compile(IPV4_REGEXP_STR);
578        Pattern.compile(IPV6_REGEXP_STR);
579        Pattern.compile(IPV6_REGEXP_STR2);
580        Pattern.compile(HOSTNAME_REGEXP_STR);
581    }
582
583    public void testFindBoundaryCases1() {
584        Pattern pat = Pattern.compile(".*\n");
585        Matcher mat = pat.matcher("a\n");
586
587        mat.find();
588        assertEquals("a\n", mat.group());
589
590    }
591
592    public void testFindBoundaryCases2() {
593        Pattern pat = Pattern.compile(".*A");
594        Matcher mat = pat.matcher("aAa");
595
596        mat.find();
597        assertEquals("aA", mat.group());
598
599    }
600
601    public void testFindBoundaryCases3() {
602        Pattern pat = Pattern.compile(".*A");
603        Matcher mat = pat.matcher("a\naA\n");
604
605        mat.find();
606        assertEquals("aA", mat.group());
607
608    }
609
610    public void testFindBoundaryCases4() {
611        Pattern pat = Pattern.compile("A.*");
612        Matcher mat = pat.matcher("A\n");
613
614        mat.find();
615        assertEquals("A", mat.group());
616
617    }
618
619    public void testFindBoundaryCases5() {
620        Pattern pat = Pattern.compile(".*A.*");
621        Matcher mat = pat.matcher("\nA\naaa\nA\naaAaa\naaaA\n");
622        // Matcher mat = pat.matcher("\nA\n");
623        String[] res = { "A", "A", "aaAaa", "aaaA" };
624        int k = 0;
625        for (; mat.find(); k++) {
626            assertEquals(res[k], mat.group());
627        }
628    }
629
630    public void testFindBoundaryCases6() {
631        String[] res = { "", "a", "", "" };
632        Pattern pat = Pattern.compile(".*");
633        Matcher mat = pat.matcher("\na\n");
634        int k = 0;
635
636        for (; mat.find(); k++) {
637            assertEquals(res[k], mat.group());
638        }
639    }
640
641    public void _testFindBoundaryCases7() {
642        Pattern pat = Pattern.compile(".*");
643        Matcher mat = pat.matcher("\na\n");
644        int k = 0;
645
646        for (; mat.find(); k++) {
647            System.out.println(mat.group());
648            System.out.flush();
649        }
650    }
651
652    public void testBackReferences() {
653        Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))");
654        Matcher mat = pat
655                .matcher("(start1: word :start1)(start2: word :start2)");
656        int k = 1;
657        for (; mat.find(); k++) {
658            assertEquals("start" + k, mat.group(2));
659            assertEquals(" word ", mat.group(3));
660            assertEquals("start" + k, mat.group(4));
661
662        }
663
664        assertEquals(3, k);
665        pat = Pattern.compile(".*(.)\\1");
666        mat = pat.matcher("saa");
667        assertTrue(mat.matches());
668    }
669
670    public void _testBackReferences1() {
671        Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))");
672        Matcher mat = pat
673                .matcher("(start1: word :start1)(start2: word :start2)");
674        int k = 1;
675        for (; mat.find(); k++) {
676            System.out.println(mat.group(2));
677            System.out.println(mat.group(3));
678            System.out.println(mat.group(4));
679
680        }
681
682        assertEquals(3, k);
683    }
684
685    public void testNewLine() {
686        Pattern pat = Pattern.compile("(^$)*\n", Pattern.MULTILINE);
687        Matcher mat = pat.matcher("\r\n\n");
688        int counter = 0;
689        while (mat.find()) {
690            counter++;
691        }
692        assertEquals(2, counter);
693    }
694
695    public void testFindGreedy() {
696        Pattern pat = Pattern.compile(".*aaa", Pattern.DOTALL);
697        Matcher mat = pat.matcher("aaaa\naaa\naaaaaa");
698        mat.matches();
699        assertEquals(15, mat.end());
700    }
701
702    public void testSerialization() throws Exception {
703        Pattern pat = Pattern.compile("a*bc");
704        SerializableAssert comparator = new SerializableAssert() {
705            public void assertDeserialized(Serializable initial,
706                    Serializable deserialized) {
707                assertEquals(((Pattern) initial).toString(),
708                        ((Pattern) deserialized).toString());
709            }
710        };
711        SerializationTest.verifyGolden(this, pat, comparator);
712        SerializationTest.verifySelf(pat, comparator);
713    }
714
715    public void testSOLQuant() {
716        Pattern pat = Pattern.compile("$*", Pattern.MULTILINE);
717        Matcher mat = pat.matcher("\n\n");
718        int counter = 0;
719        while (mat.find()) {
720            counter++;
721        }
722
723        assertEquals(3, counter);
724    }
725
726    public void testIllegalEscape() {
727        try {
728            Pattern.compile("\\y");
729            fail("PatternSyntaxException expected");
730        } catch (PatternSyntaxException pse) {
731        }
732    }
733
734    public void testEmptyFamily() {
735        Pattern.compile("\\p{Lower}");
736    }
737
738    public void testNonCaptConstr() {
739        // Flags
740        Pattern pat = Pattern.compile("(?i)b*(?-i)a*");
741        assertTrue(pat.matcher("bBbBaaaa").matches());
742        assertFalse(pat.matcher("bBbBAaAa").matches());
743
744        // Non-capturing groups
745        pat = Pattern.compile("(?i:b*)a*");
746        assertTrue(pat.matcher("bBbBaaaa").matches());
747        assertFalse(pat.matcher("bBbBAaAa").matches());
748
749        pat = Pattern
750        // 1 2 3 4 5 6 7 8 9 10 11
751                .compile("(?:-|(-?\\d+\\d\\d\\d))?(?:-|-(\\d\\d))?(?:-|-(\\d\\d))?(T)?(?:(\\d\\d):(\\d\\d):(\\d\\d)(\\.\\d+)?)?(?:(?:((?:\\+|\\-)\\d\\d):(\\d\\d))|(Z))?");
752        Matcher mat = pat.matcher("-1234-21-31T41:51:61.789+71:81");
753        assertTrue(mat.matches());
754        assertEquals("-1234", mat.group(1));
755        assertEquals("21", mat.group(2));
756        assertEquals("31", mat.group(3));
757        assertEquals("T", mat.group(4));
758        assertEquals("41", mat.group(5));
759        assertEquals("51", mat.group(6));
760        assertEquals("61", mat.group(7));
761        assertEquals(".789", mat.group(8));
762        assertEquals("+71", mat.group(9));
763        assertEquals("81", mat.group(10));
764
765        // positive lookahead
766        pat = Pattern.compile(".*\\.(?=log$).*$");
767        assertTrue(pat.matcher("a.b.c.log").matches());
768        assertFalse(pat.matcher("a.b.c.log.").matches());
769
770        // negative lookahead
771        pat = Pattern.compile(".*\\.(?!log$).*$");
772        assertFalse(pat.matcher("abc.log").matches());
773        assertTrue(pat.matcher("abc.logg").matches());
774
775        // positive lookbehind
776        pat = Pattern.compile(".*(?<=abc)\\.log$");
777        assertFalse(pat.matcher("cde.log").matches());
778        assertTrue(pat.matcher("abc.log").matches());
779
780        // negative lookbehind
781        pat = Pattern.compile(".*(?<!abc)\\.log$");
782        assertTrue(pat.matcher("cde.log").matches());
783        assertFalse(pat.matcher("abc.log").matches());
784
785        // atomic group
786        pat = Pattern.compile("(?>a*)abb");
787        assertFalse(pat.matcher("aaabb").matches());
788        pat = Pattern.compile("(?>a*)bb");
789        assertTrue(pat.matcher("aaabb").matches());
790
791        pat = Pattern.compile("(?>a|aa)aabb");
792        assertTrue(pat.matcher("aaabb").matches());
793        pat = Pattern.compile("(?>aa|a)aabb");
794        assertFalse(pat.matcher("aaabb").matches());
795
796        // quantifiers over look ahead
797        pat = Pattern.compile(".*(?<=abc)*\\.log$");
798        assertTrue(pat.matcher("cde.log").matches());
799        pat = Pattern.compile(".*(?<=abc)+\\.log$");
800        assertFalse(pat.matcher("cde.log").matches());
801
802    }
803
804    public void _testCorrectReplacementBackreferencedJointSet() {
805        Pattern.compile("ab(a)*\\1");
806        Pattern.compile("abc(cd)fg");
807        Pattern.compile("aba*cd");
808        Pattern.compile("ab(a)*+cd");
809        Pattern.compile("ab(a)*?cd");
810        Pattern.compile("ab(a)+cd");
811        Pattern.compile(".*(.)\\1");
812        Pattern.compile("ab((a)|c|d)e");
813        Pattern.compile("abc((a(b))cd)");
814        Pattern.compile("ab(a)++cd");
815        Pattern.compile("ab(a)?(c)d");
816        Pattern.compile("ab(a)?+cd");
817        Pattern.compile("ab(a)??cd");
818        Pattern.compile("ab(a)??cd");
819        Pattern.compile("ab(a){1,3}?(c)d");
820    }
821
822    public void testCompilePatternWithTerminatorMark() {
823        Pattern pat = Pattern.compile("a\u0000\u0000cd");
824        Matcher mat = pat.matcher("a\u0000\u0000cd");
825        assertTrue(mat.matches());
826    }
827
828    public void testAlternations() {
829        String baseString = "|a|bc";
830        Pattern pat = Pattern.compile(baseString);
831        Matcher mat = pat.matcher("");
832
833        assertTrue(mat.matches());
834
835        baseString = "a||bc";
836        pat = Pattern.compile(baseString);
837        mat = pat.matcher("");
838        assertTrue(mat.matches());
839
840        baseString = "a|bc|";
841        pat = Pattern.compile(baseString);
842        mat = pat.matcher("");
843        assertTrue(mat.matches());
844
845        baseString = "a|b|";
846        pat = Pattern.compile(baseString);
847        mat = pat.matcher("");
848        assertTrue(mat.matches());
849
850        baseString = "a(|b|cd)e";
851        pat = Pattern.compile(baseString);
852        mat = pat.matcher("ae");
853        assertTrue(mat.matches());
854
855        baseString = "a(b||cd)e";
856        pat = Pattern.compile(baseString);
857        mat = pat.matcher("ae");
858        assertTrue(mat.matches());
859
860        baseString = "a(b|cd|)e";
861        pat = Pattern.compile(baseString);
862        mat = pat.matcher("ae");
863        assertTrue(mat.matches());
864
865        baseString = "a(b|c|)e";
866        pat = Pattern.compile(baseString);
867        mat = pat.matcher("ae");
868        assertTrue(mat.matches());
869
870        baseString = "a(|)e";
871        pat = Pattern.compile(baseString);
872        mat = pat.matcher("ae");
873        assertTrue(mat.matches());
874
875        baseString = "|";
876        pat = Pattern.compile(baseString);
877        mat = pat.matcher("");
878        assertTrue(mat.matches());
879
880        baseString = "a(?:|)e";
881        pat = Pattern.compile(baseString);
882        mat = pat.matcher("ae");
883        assertTrue(mat.matches());
884
885        baseString = "a||||bc";
886        pat = Pattern.compile(baseString);
887        mat = pat.matcher("");
888        assertTrue(mat.matches());
889
890        baseString = "(?i-is)|a";
891        pat = Pattern.compile(baseString);
892        mat = pat.matcher("a");
893        assertTrue(mat.matches());
894    }
895
896    public void testMatchWithGroups() {
897        String baseString = "jwkerhjwehrkwjehrkwjhrwkjehrjwkehrjkwhrkwehrkwhrkwrhwkhrwkjehr";
898        String pattern = ".*(..).*\\1.*";
899        assertTrue(Pattern.compile(pattern).matcher(baseString).matches());
900
901        baseString = "saa";
902        pattern = ".*(.)\\1";
903        assertTrue(Pattern.compile(pattern).matcher(baseString).matches());
904        assertTrue(Pattern.compile(pattern).matcher(baseString).find());
905    }
906
907    public void testSplitEmptyCharSequence() {
908        String s1 = "";
909        String[] arr = s1.split(":");
910        assertEquals(arr.length, 1);
911    }
912
913    public void testSplitEndsWithPattern() {
914        assertEquals(",,".split(",", 3).length, 3);
915        assertEquals(",,".split(",", 4).length, 3);
916
917        assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5);
918        assertEquals(Pattern.compile("b").split("ab", -1).length, 2);
919    }
920
921    public void testCaseInsensitiveFlag() {
922        assertTrue(Pattern.matches("(?i-:AbC)", "ABC"));
923    }
924
925    public void testEmptyGroups() {
926        Pattern pat = Pattern.compile("ab(?>)cda");
927        Matcher mat = pat.matcher("abcda");
928        assertTrue(mat.matches());
929
930        pat = Pattern.compile("ab()");
931        mat = pat.matcher("ab");
932        assertTrue(mat.matches());
933
934        pat = Pattern.compile("abc(?:)(..)");
935        mat = pat.matcher("abcgf");
936        assertTrue(mat.matches());
937    }
938
939    public void testCompileNonCaptGroup() {
940        boolean isCompiled = false;
941
942        try {
943            Pattern.compile("(?:)", Pattern.CANON_EQ);
944            Pattern.compile("(?:)", Pattern.CANON_EQ | Pattern.DOTALL);
945            Pattern
946                    .compile("(?:)", Pattern.CANON_EQ
947                            | Pattern.CASE_INSENSITIVE);
948            Pattern.compile("(?:)", Pattern.CANON_EQ | Pattern.COMMENTS
949                    | Pattern.UNIX_LINES);
950            isCompiled = true;
951        } catch (PatternSyntaxException e) {
952            System.out.println(e);
953        }
954        assertTrue(isCompiled);
955    }
956
957    public void testEmbeddedFlags() {
958        String baseString = "(?i)((?s)a)";
959        String testString = "A";
960        Pattern pat = Pattern.compile(baseString);
961        Matcher mat = pat.matcher(testString);
962        assertTrue(mat.matches());
963
964        baseString = "(?x)(?i)(?s)(?d)a";
965        testString = "A";
966        pat = Pattern.compile(baseString);
967        mat = pat.matcher(testString);
968        assertTrue(mat.matches());
969
970        baseString = "(?x)(?i)(?s)(?d)a.";
971        testString = "a\n";
972        pat = Pattern.compile(baseString);
973        mat = pat.matcher(testString);
974        assertTrue(mat.matches());
975
976        baseString = "abc(?x:(?i)(?s)(?d)a.)";
977        testString = "abcA\n";
978        pat = Pattern.compile(baseString);
979        mat = pat.matcher(testString);
980        assertTrue(mat.matches());
981
982        baseString = "abc((?x)d)(?i)(?s)a";
983        testString = "abcdA";
984        pat = Pattern.compile(baseString);
985        mat = pat.matcher(testString);
986        assertTrue(mat.matches());
987    }
988
989    public void testAltWithFlags() {
990        Pattern.compile("|(?i-xi)|()");
991    }
992
993    public void testRestoreFlagsAfterGroup() {
994        String baseString = "abc((?x)d)   a";
995        String testString = "abcd   a";
996        Pattern pat = Pattern.compile(baseString);
997        Matcher mat = pat.matcher(testString);
998
999        assertTrue(mat.matches());
1000    }
1001
1002    /*
1003     * Verify if the Pattern support the following character classes:
1004     * \p{javaLowerCase} \p{javaUpperCase} \p{javaWhitespace} \p{javaMirrored}
1005     */
1006    public void testCompileCharacterClass() {
1007        // Regression for HARMONY-606, 696
1008        Pattern pattern = Pattern.compile("\\p{javaLowerCase}");
1009        assertNotNull(pattern);
1010
1011        pattern = Pattern.compile("\\p{javaUpperCase}");
1012        assertNotNull(pattern);
1013
1014        pattern = Pattern.compile("\\p{javaWhitespace}");
1015        assertNotNull(pattern);
1016
1017        pattern = Pattern.compile("\\p{javaMirrored}");
1018        assertNotNull(pattern);
1019
1020        pattern = Pattern.compile("\\p{javaDefined}");
1021        assertNotNull(pattern);
1022
1023        pattern = Pattern.compile("\\p{javaDigit}");
1024        assertNotNull(pattern);
1025
1026        pattern = Pattern.compile("\\p{javaIdentifierIgnorable}");
1027        assertNotNull(pattern);
1028
1029        pattern = Pattern.compile("\\p{javaISOControl}");
1030        assertNotNull(pattern);
1031
1032        pattern = Pattern.compile("\\p{javaJavaIdentifierPart}");
1033        assertNotNull(pattern);
1034
1035        pattern = Pattern.compile("\\p{javaJavaIdentifierStart}");
1036        assertNotNull(pattern);
1037
1038        pattern = Pattern.compile("\\p{javaLetter}");
1039        assertNotNull(pattern);
1040
1041        pattern = Pattern.compile("\\p{javaLetterOrDigit}");
1042        assertNotNull(pattern);
1043
1044        pattern = Pattern.compile("\\p{javaSpaceChar}");
1045        assertNotNull(pattern);
1046
1047        pattern = Pattern.compile("\\p{javaTitleCase}");
1048        assertNotNull(pattern);
1049
1050        pattern = Pattern.compile("\\p{javaUnicodeIdentifierPart}");
1051        assertNotNull(pattern);
1052
1053        pattern = Pattern.compile("\\p{javaUnicodeIdentifierStart}");
1054        assertNotNull(pattern);
1055    }
1056
1057    public void testCanonEqFlag() {
1058
1059        /*
1060         * for decompositions see
1061         * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt
1062         * http://www.unicode.org/reports/tr15/#Decomposition
1063         */
1064        String baseString;
1065        String testString;
1066        Pattern pat;
1067        Matcher mat;
1068
1069        baseString = "ab(a*)\\1";
1070        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1071
1072        baseString = "a(abcdf)d";
1073        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1074
1075        baseString = "aabcdfd";
1076        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1077
1078        // \u01E0 -> \u0226\u0304 ->\u0041\u0307\u0304
1079        // \u00CC -> \u0049\u0300
1080
1081        baseString = "\u01E0\u00CCcdb(ac)";
1082        testString = "\u0226\u0304\u0049\u0300cdbac";
1083        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1084        mat = pat.matcher(testString);
1085        assertTrue(mat.matches());
1086
1087        baseString = "\u01E0cdb(a\u00CCc)";
1088        testString = "\u0041\u0307\u0304cdba\u0049\u0300c";
1089        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1090        mat = pat.matcher(testString);
1091        assertTrue(mat.matches());
1092
1093        baseString = "a\u00CC";
1094        testString = "a\u0049\u0300";
1095        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1096        mat = pat.matcher(testString);
1097        assertTrue(mat.matches());
1098
1099        baseString = "\u0226\u0304cdb(ac\u0049\u0300)";
1100        testString = "\u01E0cdbac\u00CC";
1101        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1102        mat = pat.matcher(testString);
1103        assertTrue(mat.matches());
1104
1105        baseString = "cdb(?:\u0041\u0307\u0304\u00CC)";
1106        testString = "cdb\u0226\u0304\u0049\u0300";
1107        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1108        mat = pat.matcher(testString);
1109        assertTrue(mat.matches());
1110
1111        baseString = "\u01E0[a-c]\u0049\u0300cdb(ac)";
1112        testString = "\u01E0b\u00CCcdbac";
1113        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1114        mat = pat.matcher(testString);
1115        assertTrue(mat.matches());
1116
1117        baseString = "\u01E0|\u00CCcdb(ac)";
1118        testString = "\u0041\u0307\u0304";
1119        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1120        mat = pat.matcher(testString);
1121        assertTrue(mat.matches());
1122
1123        baseString = "\u00CC?cdb(ac)*(\u01E0)*[a-c]";
1124        testString = "cdb\u0041\u0307\u0304b";
1125        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1126        mat = pat.matcher(testString);
1127        assertTrue(mat.matches());
1128
1129        baseString = "a\u0300";
1130        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1131        mat = pat.matcher("a\u00E0a");
1132        assertTrue(mat.find());
1133
1134        baseString = "\u7B20\uF9F8abc";
1135        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1136        mat = pat.matcher("\uF9F8\uF9F8abc");
1137        assertTrue(mat.matches());
1138
1139        // \u01F9 -> \u006E\u0300
1140        // \u00C3 -> \u0041\u0303
1141
1142        baseString = "cdb(?:\u00C3\u006E\u0300)";
1143        testString = "cdb\u0041\u0303\u01F9";
1144        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1145        mat = pat.matcher(testString);
1146        assertTrue(mat.matches());
1147
1148        // \u014C -> \u004F\u0304
1149        // \u0163 -> \u0074\u0327
1150
1151        baseString = "cdb(?:\u0163\u004F\u0304)";
1152        testString = "cdb\u0074\u0327\u014C";
1153        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1154        mat = pat.matcher(testString);
1155        assertTrue(mat.matches());
1156
1157        // \u00E1->a\u0301
1158        // canonical ordering takes place \u0301\u0327 -> \u0327\u0301
1159
1160        baseString = "c\u0327\u0301";
1161        testString = "c\u0301\u0327";
1162        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1163        mat = pat.matcher(testString);
1164        assertTrue(mat.matches());
1165
1166        /*
1167         * Hangul decompositions
1168         */
1169        // \uD4DB->\u1111\u1171\u11B6
1170        // \uD21E->\u1110\u116D\u11B5
1171        // \uD264->\u1110\u1170
1172        // not Hangul:\u0453->\u0433\u0301
1173        baseString = "a\uD4DB\u1111\u1171\u11B6\uD264";
1174        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1175
1176        baseString = "\u0453c\uD4DB";
1177        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1178
1179        baseString = "a\u1110\u116D\u11B5b\uD21Ebc";
1180        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1181
1182        baseString = "\uD4DB\uD21E\u1110\u1170cdb(ac)";
1183        testString = "\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac";
1184        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1185        mat = pat.matcher(testString);
1186        assertTrue(mat.matches());
1187
1188        baseString = "\uD4DB\uD264cdb(a\uD21Ec)";
1189        testString = "\u1111\u1171\u11B6\u1110\u1170cdba\u1110\u116D\u11B5c";
1190        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1191        mat = pat.matcher(testString);
1192        assertTrue(mat.matches());
1193
1194        baseString = "a\uD4DB";
1195        testString = "a\u1111\u1171\u11B6";
1196        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1197        mat = pat.matcher(testString);
1198        assertTrue(mat.matches());
1199
1200        baseString = "a\uD21E";
1201        testString = "a\u1110\u116D\u11B5";
1202        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1203        mat = pat.matcher(testString);
1204        assertTrue(mat.matches());
1205
1206        baseString = "\u1111\u1171\u11B6cdb(ac\u1110\u116D\u11B5)";
1207        testString = "\uD4DBcdbac\uD21E";
1208        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1209        mat = pat.matcher(testString);
1210        assertTrue(mat.matches());
1211
1212        baseString = "cdb(?:\u1111\u1171\u11B6\uD21E)";
1213        testString = "cdb\uD4DB\u1110\u116D\u11B5";
1214        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1215        mat = pat.matcher(testString);
1216        assertTrue(mat.matches());
1217
1218        baseString = "\uD4DB[a-c]\u1110\u116D\u11B5cdb(ac)";
1219        testString = "\uD4DBb\uD21Ecdbac";
1220        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1221        mat = pat.matcher(testString);
1222        assertTrue(mat.matches());
1223
1224        baseString = "\uD4DB|\u00CCcdb(ac)";
1225        testString = "\u1111\u1171\u11B6";
1226        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1227        mat = pat.matcher(testString);
1228        assertTrue(mat.matches());
1229
1230        baseString = "\uD4DB|\u00CCcdb(ac)";
1231        testString = "\u1111\u1171";
1232        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1233        mat = pat.matcher(testString);
1234        assertFalse(mat.matches());
1235
1236        baseString = "\u00CC?cdb(ac)*(\uD4DB)*[a-c]";
1237        testString = "cdb\u1111\u1171\u11B6b";
1238        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1239        mat = pat.matcher(testString);
1240        assertTrue(mat.matches());
1241
1242        baseString = "\uD4DB";
1243        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1244        mat = pat.matcher("a\u1111\u1171\u11B6a");
1245        assertTrue(mat.find());
1246
1247        baseString = "\u1111";
1248        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1249        mat = pat.matcher("bcda\uD4DBr");
1250        assertFalse(mat.find());
1251    }
1252
1253    public void testIndexesCanonicalEq() {
1254        String baseString;
1255        String testString;
1256        Pattern pat;
1257        Matcher mat;
1258
1259        baseString = "\uD4DB";
1260        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1261        mat = pat.matcher("bcda\u1111\u1171\u11B6awr");
1262        assertTrue(mat.find());
1263        assertEquals(mat.start(), 4);
1264        assertEquals(mat.end(), 7);
1265
1266        baseString = "\uD4DB\u1111\u1171\u11B6";
1267        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1268        mat = pat.matcher("bcda\u1111\u1171\u11B6\uD4DBawr");
1269        assertTrue(mat.find());
1270        assertEquals(mat.start(), 4);
1271        assertEquals(mat.end(), 8);
1272
1273        baseString = "\uD4DB\uD21E\u1110\u1170";
1274        testString = "abcabc\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac";
1275        pat = Pattern.compile(baseString, Pattern.CANON_EQ);
1276        mat = pat.matcher(testString);
1277        assertTrue(mat.find());
1278        assertEquals(mat.start(), 6);
1279        assertEquals(mat.end(), 13);
1280    }
1281
1282    public void testCanonEqFlagWithSupplementaryCharacters() {
1283
1284        /*
1285         * \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32
1286         * \uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F
1287         * ->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16
1288         */
1289        String patString = "abc\uD834\uDDBFef";
1290        String testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
1291        Pattern pat = Pattern.compile(patString, Pattern.CANON_EQ);
1292        Matcher mat = pat.matcher(testString);
1293        assertTrue(mat.matches());
1294
1295        testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
1296        mat = pat.matcher(testString);
1297        assertTrue(mat.matches());
1298
1299        patString = "abc\uD834\uDDBB\uD834\uDD6Fef";
1300        testString = "abc\uD834\uDDBFef";
1301        pat = Pattern.compile(patString, Pattern.CANON_EQ);
1302        mat = pat.matcher(testString);
1303        assertTrue(mat.matches());
1304
1305        testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
1306        mat = pat.matcher(testString);
1307        assertTrue(mat.matches());
1308
1309        patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
1310        testString = "abc\uD834\uDDBFef";
1311        pat = Pattern.compile(patString, Pattern.CANON_EQ);
1312        mat = pat.matcher(testString);
1313        assertTrue(mat.matches());
1314
1315        testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
1316        mat = pat.matcher(testString);
1317        assertTrue(mat.matches());
1318
1319        /*
1320         * testSupplementary characters with no decomposition
1321         */
1322        patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef";
1323        testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef";
1324        pat = Pattern.compile(patString, Pattern.CANON_EQ);
1325        mat = pat.matcher(testString);
1326        assertTrue(mat.matches());
1327    }
1328
1329    public void testRangesWithSurrogatesSupplementary() {
1330        String patString = "[abc\uD8D2]";
1331        String testString = "\uD8D2";
1332        Pattern pat = Pattern.compile(patString);
1333        Matcher mat = pat.matcher(testString);
1334        assertTrue(mat.matches());
1335
1336        testString = "a";
1337        mat = pat.matcher(testString);
1338        assertTrue(mat.matches());
1339
1340        testString = "ef\uD8D2\uDD71gh";
1341        mat = pat.matcher(testString);
1342        assertFalse(mat.find());
1343
1344        testString = "ef\uD8D2gh";
1345        mat = pat.matcher(testString);
1346        assertTrue(mat.find());
1347
1348        patString = "[abc\uD8D3&&[c\uD8D3]]";
1349        testString = "c";
1350        pat = Pattern.compile(patString);
1351        mat = pat.matcher(testString);
1352        assertTrue(mat.matches());
1353
1354        testString = "a";
1355        mat = pat.matcher(testString);
1356        assertFalse(mat.matches());
1357
1358        testString = "ef\uD8D3\uDD71gh";
1359        mat = pat.matcher(testString);
1360        assertFalse(mat.find());
1361
1362        testString = "ef\uD8D3gh";
1363        mat = pat.matcher(testString);
1364        assertTrue(mat.find());
1365
1366        patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]";
1367        testString = "c";
1368        pat = Pattern.compile(patString);
1369        mat = pat.matcher(testString);
1370        assertTrue(mat.matches());
1371
1372        testString = "\uDBEE\uDF0C";
1373        mat = pat.matcher(testString);
1374        assertTrue(mat.matches());
1375
1376        testString = "ef\uD8D3\uDD71gh";
1377        mat = pat.matcher(testString);
1378        assertFalse(mat.find());
1379
1380        testString = "ef\uD8D3gh";
1381        mat = pat.matcher(testString);
1382        assertTrue(mat.find());
1383
1384        patString = "[abc\uDBFC]\uDDC2cd";
1385        testString = "\uDBFC\uDDC2cd";
1386        pat = Pattern.compile(patString);
1387        mat = pat.matcher(testString);
1388        assertFalse(mat.matches());
1389
1390        testString = "a\uDDC2cd";
1391        mat = pat.matcher(testString);
1392        assertTrue(mat.matches());
1393    }
1394
1395    public void testSequencesWithSurrogatesSupplementary() {
1396        String patString = "abcd\uD8D3";
1397        String testString = "abcd\uD8D3\uDFFC";
1398        Pattern pat = Pattern.compile(patString);
1399        Matcher mat = pat.matcher(testString);
1400        assertFalse(mat.find());
1401
1402        testString = "abcd\uD8D3abc";
1403        mat = pat.matcher(testString);
1404        assertTrue(mat.find());
1405
1406        patString = "ab\uDBEFcd";
1407        testString = "ab\uDBEFcd";
1408        pat = Pattern.compile(patString);
1409        mat = pat.matcher(testString);
1410        assertTrue(mat.matches());
1411
1412        patString = "\uDFFCabcd";
1413        testString = "\uD8D3\uDFFCabcd";
1414        pat = Pattern.compile(patString);
1415        mat = pat.matcher(testString);
1416        assertFalse(mat.find());
1417
1418        testString = "abc\uDFFCabcdecd";
1419        mat = pat.matcher(testString);
1420        assertTrue(mat.find());
1421
1422        patString = "\uD8D3\uDFFCabcd";
1423        testString = "abc\uD8D3\uD8D3\uDFFCabcd";
1424        pat = Pattern.compile(patString);
1425        mat = pat.matcher(testString);
1426        assertTrue(mat.find());
1427    }
1428
1429    public void testPredefinedClassesWithSurrogatesSupplementary() {
1430        String patString = "[123\\D]";
1431        String testString = "a";
1432        Pattern pat = Pattern.compile(patString);
1433        Matcher mat = pat.matcher(testString);
1434        assertTrue(mat.find());
1435
1436        testString = "5";
1437        mat = pat.matcher(testString);
1438        assertFalse(mat.find());
1439
1440        testString = "3";
1441        mat = pat.matcher(testString);
1442        assertTrue(mat.find());
1443
1444        // low surrogate
1445        testString = "\uDFC4";
1446        mat = pat.matcher(testString);
1447        assertTrue(mat.find());
1448
1449        // high surrogate
1450        testString = "\uDADA";
1451        mat = pat.matcher(testString);
1452        assertTrue(mat.find());
1453
1454        testString = "\uDADA\uDFC4";
1455        mat = pat.matcher(testString);
1456        assertTrue(mat.find());
1457
1458        patString = "[123[^\\p{javaDigit}]]";
1459        testString = "a";
1460        pat = Pattern.compile(patString);
1461        mat = pat.matcher(testString);
1462        assertTrue(mat.find());
1463
1464        testString = "5";
1465        mat = pat.matcher(testString);
1466        assertFalse(mat.find());
1467
1468        testString = "3";
1469        mat = pat.matcher(testString);
1470        assertTrue(mat.find());
1471
1472        // low surrogate
1473        testString = "\uDFC4";
1474        mat = pat.matcher(testString);
1475        assertTrue(mat.find());
1476
1477        // high surrogate
1478        testString = "\uDADA";
1479        mat = pat.matcher(testString);
1480        assertTrue(mat.find());
1481
1482        testString = "\uDADA\uDFC4";
1483        mat = pat.matcher(testString);
1484        assertTrue(mat.find());
1485
1486        // surrogate characters
1487        patString = "\\p{Cs}";
1488        testString = "\uD916\uDE27";
1489        pat = Pattern.compile(patString);
1490        mat = pat.matcher(testString);
1491
1492        /*
1493         * see http://www.unicode.org/reports/tr18/#Supplementary_Characters we
1494         * have to treat text as code points not code units. \\p{Cs} matches any
1495         * surrogate character but here testString is a one code point
1496         * consisting of two code units (two surrogate characters) so we find
1497         * nothing
1498         */
1499        assertFalse(mat.find());
1500
1501        // swap low and high surrogates
1502        testString = "\uDE27\uD916";
1503        mat = pat.matcher(testString);
1504        assertTrue(mat.find());
1505
1506        patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]";
1507        testString = "1";
1508        pat = Pattern.compile(patString);
1509        mat = pat.matcher(testString);
1510        assertTrue(mat.find());
1511
1512        testString = "\uD916";
1513        pat = Pattern.compile(patString);
1514        mat = pat.matcher(testString);
1515        assertFalse(mat.find());
1516
1517        testString = "\uD916\uDE27";
1518        pat = Pattern.compile(patString);
1519        mat = pat.matcher(testString);
1520        assertTrue(mat.find());
1521
1522        // \uD9A0\uDE8E=\u7828E
1523        // \u78281=\uD9A0\uDE81
1524        patString = "[a-\uD9A0\uDE8E]";
1525        testString = "\uD9A0\uDE81";
1526        pat = Pattern.compile(patString);
1527        mat = pat.matcher(testString);
1528        assertTrue(mat.matches());
1529    }
1530
1531    public void testDotConstructionWithSurrogatesSupplementary() {
1532        String patString = ".";
1533        String testString = "\uD9A0\uDE81";
1534        Pattern pat = Pattern.compile(patString);
1535        Matcher mat = pat.matcher(testString);
1536        assertTrue(mat.matches());
1537
1538        testString = "\uDE81";
1539        mat = pat.matcher(testString);
1540        assertTrue(mat.matches());
1541
1542        testString = "\uD9A0";
1543        mat = pat.matcher(testString);
1544        assertTrue(mat.matches());
1545
1546        testString = "\n";
1547        mat = pat.matcher(testString);
1548        assertFalse(mat.matches());
1549
1550        patString = ".*\uDE81";
1551        testString = "\uD9A0\uDE81\uD9A0\uDE81\uD9A0\uDE81";
1552        pat = Pattern.compile(patString);
1553        mat = pat.matcher(testString);
1554        assertFalse(mat.matches());
1555
1556        testString = "\uD9A0\uDE81\uD9A0\uDE81\uDE81";
1557        mat = pat.matcher(testString);
1558        assertTrue(mat.matches());
1559
1560        patString = ".*";
1561        testString = "\uD9A0\uDE81\n\uD9A0\uDE81\uD9A0\n\uDE81";
1562        pat = Pattern.compile(patString, Pattern.DOTALL);
1563        mat = pat.matcher(testString);
1564        assertTrue(mat.matches());
1565    }
1566
1567    public void testQuantifiersWithSurrogatesSupplementary() {
1568        String patString = "\uD9A0\uDE81*abc";
1569        String testString = "\uD9A0\uDE81\uD9A0\uDE81abc";
1570        Pattern pat = Pattern.compile(patString);
1571        Matcher mat = pat.matcher(testString);
1572        assertTrue(mat.matches());
1573
1574        testString = "abc";
1575        mat = pat.matcher(testString);
1576        assertTrue(mat.matches());
1577    }
1578
1579    public void testAlternationsWithSurrogatesSupplementary() {
1580        String patString = "\uDE81|\uD9A0\uDE81|\uD9A0";
1581        String testString = "\uD9A0";
1582        Pattern pat = Pattern.compile(patString);
1583        Matcher mat = pat.matcher(testString);
1584        assertTrue(mat.matches());
1585
1586        testString = "\uDE81";
1587        mat = pat.matcher(testString);
1588        assertTrue(mat.matches());
1589
1590        testString = "\uD9A0\uDE81";
1591        mat = pat.matcher(testString);
1592        assertTrue(mat.matches());
1593
1594        testString = "\uDE81\uD9A0";
1595        mat = pat.matcher(testString);
1596        assertFalse(mat.matches());
1597    }
1598
1599    public void testGroupsWithSurrogatesSupplementary() {
1600
1601        // this pattern matches nothing
1602        String patString = "(\uD9A0)\uDE81";
1603        String testString = "\uD9A0\uDE81";
1604        Pattern pat = Pattern.compile(patString);
1605        Matcher mat = pat.matcher(testString);
1606        assertFalse(mat.matches());
1607
1608        patString = "(\uD9A0)";
1609        testString = "\uD9A0\uDE81";
1610        pat = Pattern.compile(patString, Pattern.DOTALL);
1611        mat = pat.matcher(testString);
1612        assertFalse(mat.find());
1613    }
1614
1615    /*
1616     * Regression test for HARMONY-688
1617     */
1618    public void testUnicodeCategoryWithSurrogatesSupplementary() {
1619        Pattern p = Pattern.compile("\\p{javaLowerCase}");
1620        Matcher matcher = p.matcher("\uD801\uDC28");
1621        assertTrue(matcher.find());
1622    }
1623
1624}
1625