1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package org.apache.harmony.tests.java.util.regex; 19 20import java.io.Serializable; 21import java.util.regex.Matcher; 22import java.util.regex.Pattern; 23import java.util.regex.PatternSyntaxException; 24 25import junit.framework.TestCase; 26 27import org.apache.harmony.testframework.serialization.SerializationTest; 28import org.apache.harmony.testframework.serialization.SerializationTest.SerializableAssert; 29 30@SuppressWarnings("nls") 31public class PatternTest extends TestCase { 32 String[] testPatterns = { 33 "(a|b)*abb", 34 "(1*2*3*4*)*567", 35 "(a|b|c|d)*aab", 36 "(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*", 37 "(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*", 38 "(a|b)*(a|b)*A(a|b)*lice.*", 39 "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|" 40 + "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do", 41 "x(?c)y", "x(?cc)y", "x(?:c)y" 42 43 }; 44 45 public PatternTest(String name) { 46 super(name); 47 } 48 49 public void testCommentsInPattern() { 50 Pattern p = Pattern.compile("ab# this is a comment\ncd", Pattern.COMMENTS); 51 assertTrue(p.matcher("abcd").matches()); 52 } 53 54 /* 55 * Class under test for String[] split(CharSequence, int) 56 */ 57 public void testSplitCharSequenceint() { 58 // splitting CharSequence which ends with pattern 59 // bug6193 60 assertEquals(",,".split(",", 3).length, 3); 61 assertEquals(",,".split(",", 4).length, 3); 62 // bug6193 63 // bug5391 64 assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5); 65 assertEquals(Pattern.compile("b").split("ab", -1).length, 2); 66 // bug5391 67 String s[]; 68 Pattern pat = Pattern.compile("x"); 69 s = pat.split("zxx:zzz:zxx", 10); 70 assertEquals(s.length, 5); 71 s = pat.split("zxx:zzz:zxx", 3); 72 assertEquals(s.length, 3); 73 s = pat.split("zxx:zzz:zxx", -1); 74 assertEquals(s.length, 5); 75 s = pat.split("zxx:zzz:zxx", 0); 76 assertEquals(s.length, 3); 77 // other splitting 78 // negative limit 79 pat = Pattern.compile("b"); 80 s = pat.split("abccbadfebb", -1); 81 assertEquals(s.length, 5); 82 s = pat.split("", -1); 83 assertEquals(s.length, 1); 84 pat = Pattern.compile(""); 85 s = pat.split("", -1); 86 assertEquals(s.length, 1); 87 s = pat.split("abccbadfe", -1); 88 assertEquals(s.length, 11); 89 // zero limit 90 pat = Pattern.compile("b"); 91 s = pat.split("abccbadfebb", 0); 92 assertEquals(s.length, 3); 93 s = pat.split("", 0); 94 assertEquals(s.length, 1); 95 pat = Pattern.compile(""); 96 s = pat.split("", 0); 97 assertEquals(s.length, 1); 98 s = pat.split("abccbadfe", 0); 99 assertEquals(s.length, 10); 100 // positive limit 101 pat = Pattern.compile("b"); 102 s = pat.split("abccbadfebb", 12); 103 assertEquals(s.length, 5); 104 s = pat.split("", 6); 105 assertEquals(s.length, 1); 106 pat = Pattern.compile(""); 107 s = pat.split("", 11); 108 assertEquals(s.length, 1); 109 s = pat.split("abccbadfe", 15); 110 assertEquals(s.length, 11); 111 112 pat = Pattern.compile("b"); 113 s = pat.split("abccbadfebb", 5); 114 assertEquals(s.length, 5); 115 s = pat.split("", 1); 116 assertEquals(s.length, 1); 117 pat = Pattern.compile(""); 118 s = pat.split("", 1); 119 assertEquals(s.length, 1); 120 s = pat.split("abccbadfe", 11); 121 assertEquals(s.length, 11); 122 123 pat = Pattern.compile("b"); 124 s = pat.split("abccbadfebb", 3); 125 assertEquals(s.length, 3); 126 pat = Pattern.compile(""); 127 s = pat.split("abccbadfe", 5); 128 assertEquals(s.length, 5); 129 } 130 131 /* 132 * Class under test for String[] split(CharSequence) 133 */ 134 public void testSplitCharSequence() { 135 String s[]; 136 Pattern pat = Pattern.compile("b"); 137 s = pat.split("abccbadfebb"); 138 assertEquals(s.length, 3); 139 s = pat.split(""); 140 assertEquals(s.length, 1); 141 pat = Pattern.compile(""); 142 s = pat.split(""); 143 assertEquals(s.length, 1); 144 s = pat.split("abccbadfe"); 145 assertEquals(s.length, 10); 146 // bug6544 147 String s1 = ""; 148 String[] arr = s1.split(":"); 149 assertEquals(arr.length, 1); 150 // bug6544 151 } 152 153 public void testPattern() { 154 } 155 156 public void testFlags() { 157 String baseString; 158 String testString; 159 Pattern pat; 160 Matcher mat; 161 162 baseString = "((?i)|b)a"; 163 testString = "A"; 164 pat = Pattern.compile(baseString); 165 mat = pat.matcher(testString); 166 assertFalse(mat.matches()); 167 168 baseString = "(?i)a|b"; 169 testString = "A"; 170 pat = Pattern.compile(baseString); 171 mat = pat.matcher(testString); 172 assertTrue(mat.matches()); 173 174 baseString = "(?i)a|b"; 175 testString = "B"; 176 pat = Pattern.compile(baseString); 177 mat = pat.matcher(testString); 178 assertTrue(mat.matches()); 179 180 baseString = "c|(?i)a|b"; 181 testString = "B"; 182 pat = Pattern.compile(baseString); 183 mat = pat.matcher(testString); 184 assertTrue(mat.matches()); 185 186 baseString = "(?i)a|(?s)b"; 187 testString = "B"; 188 pat = Pattern.compile(baseString); 189 mat = pat.matcher(testString); 190 assertTrue(mat.matches()); 191 192 baseString = "(?i)a|(?-i)b"; 193 testString = "B"; 194 pat = Pattern.compile(baseString); 195 mat = pat.matcher(testString); 196 assertFalse(mat.matches()); 197 198 baseString = "(?i)a|(?-i)c|b"; 199 testString = "B"; 200 pat = Pattern.compile(baseString); 201 mat = pat.matcher(testString); 202 assertFalse(mat.matches()); 203 204 baseString = "(?i)a|(?-i)c|(?i)b"; 205 testString = "B"; 206 pat = Pattern.compile(baseString); 207 mat = pat.matcher(testString); 208 assertTrue(mat.matches()); 209 210 baseString = "(?i)a|(?-i)b"; 211 testString = "A"; 212 pat = Pattern.compile(baseString); 213 mat = pat.matcher(testString); 214 assertTrue(mat.matches()); 215 216 baseString = "((?i))a"; 217 testString = "A"; 218 pat = Pattern.compile(baseString); 219 mat = pat.matcher(testString); 220 assertFalse(mat.matches()); 221 222 baseString = "|(?i)|a"; 223 testString = "A"; 224 pat = Pattern.compile(baseString); 225 mat = pat.matcher(testString); 226 assertTrue(mat.matches()); 227 228 baseString = "(?i)((?s)a.)"; 229 testString = "A\n"; 230 pat = Pattern.compile(baseString); 231 mat = pat.matcher(testString); 232 assertTrue(mat.matches()); 233 234 baseString = "(?i)((?-i)a)"; 235 testString = "A"; 236 pat = Pattern.compile(baseString); 237 mat = pat.matcher(testString); 238 assertFalse(mat.matches()); 239 240 baseString = "(?i)(?s:a.)"; 241 testString = "A\n"; 242 pat = Pattern.compile(baseString); 243 mat = pat.matcher(testString); 244 assertTrue(mat.matches()); 245 246 baseString = "(?i)fgh(?s:aa)"; 247 testString = "fghAA"; 248 pat = Pattern.compile(baseString); 249 mat = pat.matcher(testString); 250 assertTrue(mat.matches()); 251 252 baseString = "(?i)((?-i))a"; 253 testString = "A"; 254 pat = Pattern.compile(baseString); 255 mat = pat.matcher(testString); 256 assertTrue(mat.matches()); 257 258 baseString = "abc(?i)d"; 259 testString = "ABCD"; 260 pat = Pattern.compile(baseString); 261 mat = pat.matcher(testString); 262 assertFalse(mat.matches()); 263 264 testString = "abcD"; 265 mat = pat.matcher(testString); 266 assertTrue(mat.matches()); 267 268 baseString = "a(?i)a(?-i)a(?i)a(?-i)a"; 269 testString = "aAaAa"; 270 pat = Pattern.compile(baseString); 271 mat = pat.matcher(testString); 272 assertTrue(mat.matches()); 273 274 testString = "aAAAa"; 275 mat = pat.matcher(testString); 276 assertFalse(mat.matches()); 277 } 278 279 public void testFlagsMethod() { 280 String baseString; 281 Pattern pat; 282 283 /* 284 * These tests are for compatibility with RI only. Logically we have to 285 * return only flags specified during the compilation. For example 286 * pat.flags() == 0 when we compile Pattern pat = 287 * Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled 288 * in a case insensitive manner. So there is little sense to do calls to 289 * flags() now. 290 */ 291 baseString = "(?-i)"; 292 pat = Pattern.compile(baseString); 293 294 baseString = "(?idmsux)abc(?-i)vg(?-dmu)"; 295 pat = Pattern.compile(baseString); 296 assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS); 297 298 baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)"; 299 pat = Pattern.compile(baseString); 300 assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS); 301 302 baseString = "(?is)a((?x)b.)"; 303 pat = Pattern.compile(baseString); 304 assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE); 305 306 baseString = "(?i)a((?-i))"; 307 pat = Pattern.compile(baseString); 308 assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE); 309 310 baseString = "((?i)a)"; 311 pat = Pattern.compile(baseString); 312 assertEquals(pat.flags(), 0); 313 314 pat = Pattern.compile("(?is)abc"); 315 assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL); 316 } 317 318 /* 319 * Class under test for Pattern compile(String, int) 320 */ 321 public void testCompileStringint() { 322 /* 323 * this tests are needed to verify that appropriate exceptions are 324 * thrown 325 */ 326 String pattern = "b)a"; 327 try { 328 Pattern.compile(pattern); 329 fail("Expected a PatternSyntaxException when compiling pattern: " 330 + pattern); 331 } catch (PatternSyntaxException e) { 332 // pass 333 } 334 pattern = "bcde)a"; 335 try { 336 Pattern.compile(pattern); 337 fail("Expected a PatternSyntaxException when compiling pattern: " 338 + pattern); 339 } catch (PatternSyntaxException e) { 340 // pass 341 } 342 pattern = "bbg())a"; 343 try { 344 Pattern.compile(pattern); 345 fail("Expected a PatternSyntaxException when compiling pattern: " 346 + pattern); 347 } catch (PatternSyntaxException e) { 348 // pass 349 } 350 351 pattern = "cdb(?i))a"; 352 try { 353 Pattern.compile(pattern); 354 fail("Expected a PatternSyntaxException when compiling pattern: " 355 + pattern); 356 } catch (PatternSyntaxException e) { 357 // pass 358 } 359 360 /* 361 * This pattern should compile - HARMONY-2127 362 */ 363 pattern = "x(?c)y"; 364 Pattern.compile(pattern); 365 366 /* 367 * this pattern doesn't match any string, but should be compiled anyway 368 */ 369 pattern = "(b\\1)a"; 370 Pattern.compile(pattern); 371 } 372 373 /* 374 * Class under test for Pattern compile(String) 375 */ 376 public void testQuantCompileNeg() { 377 String[] patterns = { "5{,2}", "{5asd", "{hgdhg", "{5,hjkh", "{,5hdsh", 378 "{5,3shdfkjh}" }; 379 for (String element : patterns) { 380 try { 381 Pattern.compile(element); 382 fail("PatternSyntaxException was expected, but compilation succeeds"); 383 } catch (PatternSyntaxException pse) { 384 continue; 385 } 386 } 387 // Regression for HARMONY-1365 388 String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\G*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\B*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]*+)|(?x-xd:^{5}+)()"; 389 assertNotNull(Pattern.compile(pattern)); 390 } 391 392 public void testQuantCompilePos() { 393 String[] patterns = {/* "(abc){1,3}", */"abc{2,}", "abc{5}" }; 394 for (String element : patterns) { 395 Pattern.compile(element); 396 } 397 } 398 399 public void testQuantComposition() { 400 String pattern = "(a{1,3})aab"; 401 java.util.regex.Pattern pat = java.util.regex.Pattern.compile(pattern); 402 java.util.regex.Matcher mat = pat.matcher("aaab"); 403 mat.matches(); 404 mat.start(1); 405 mat.group(1); 406 } 407 408 public void testMatches() { 409 String[][] posSeq = { 410 { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" }, 411 { "213567", "12324567", "1234567", "213213567", 412 "21312312312567", "444444567" }, 413 { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" }, 414 { "213234567", "3458", "0987654", "7689546432", "0398576", 415 "98432", "5" }, 416 { 417 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", 418 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 419 + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" }, 420 { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa", 421 "abbbAbbbliceaaa", "Alice" }, 422 { "a123", "bnxnvgds156", "for", "while", "if", "struct" }, 423 { "xy" }, { "xy" }, { "xcy" } 424 425 }; 426 427 for (int i = 0; i < testPatterns.length; i++) { 428 for (int j = 0; j < posSeq[i].length; j++) { 429 assertTrue("Incorrect match: " + testPatterns[i] + " vs " 430 + posSeq[i][j], Pattern.matches(testPatterns[i], 431 posSeq[i][j])); 432 } 433 } 434 } 435 436 public void testTimeZoneIssue() { 437 Pattern p = Pattern.compile("GMT(\\+|\\-)(\\d+)(:(\\d+))?"); 438 Matcher m = p.matcher("GMT-9:45"); 439 assertTrue(m.matches()); 440 assertEquals("-", m.group(1)); 441 assertEquals("9", m.group(2)); 442 assertEquals(":45", m.group(3)); 443 assertEquals("45", m.group(4)); 444 } 445 446 public void testCompileRanges() { 447 String[] correctTestPatterns = { "[^]*abb]*", "[^a-d[^m-p]]*abb", 448 "[a-d\\d]*abb", "[abc]*abb", "[a-e&&[de]]*abb", "[^abc]*abb", 449 "[a-e&&[^de]]*abb", "[a-z&&[^m-p]]*abb", "[a-d[m-p]]*abb", 450 "[a-zA-Z]*abb", "[+*?]*abb", "[^+*?]*abb" }; 451 452 String[] inputSecuence = { "kkkk", "admpabb", "abcabcd124654abb", 453 "abcabccbacababb", "dededededededeedabb", "gfdhfghgdfghabb", 454 "accabacbcbaabb", "acbvfgtyabb", "adbcacdbmopabcoabb", 455 "jhfkjhaSDFGHJkdfhHNJMjkhfabb", "+*??+*abb", "sdfghjkabb" }; 456 457 for (int i = 0; i < correctTestPatterns.length; i++) { 458 assertTrue("pattern: " + correctTestPatterns[i] + " input: " 459 + inputSecuence[i], Pattern.matches(correctTestPatterns[i], 460 inputSecuence[i])); 461 462 } 463 464 String[] wrongInputSecuence = { "]", "admpkk", "abcabcd124k654abb", 465 "abwcabccbacababb", "abababdeababdeabb", "abcabcacbacbabb", 466 "acdcbecbaabb", "acbotyabb", "adbcaecdbmopabcoabb", 467 "jhfkjhaSDFGHJk;dfhHNJMjkhfabb", "+*?a?+*abb", "sdf+ghjkabb" }; 468 469 for (int i = 0; i < correctTestPatterns.length; i++) { 470 assertFalse("pattern: " + correctTestPatterns[i] + " input: " 471 + wrongInputSecuence[i], Pattern.matches( 472 correctTestPatterns[i], wrongInputSecuence[i])); 473 474 } 475 } 476 477 public void testRangesSpecialCases() { 478 String neg_patterns[] = { "[a-&&[b-c]]", "[a-\\w]", "[b-a]", "[]" }; 479 480 for (String element : neg_patterns) { 481 try { 482 Pattern.compile(element); 483 fail("PatternSyntaxException was expected: " + element); 484 } catch (PatternSyntaxException pse) { 485 } 486 } 487 488 String pos_patterns[] = { "[-]+", "----", "[a-]+", "a-a-a-a-aa--", 489 "[\\w-a]+", "123-2312--aaa-213", "[a-]]+", "-]]]]]]]]]]]]]]]" }; 490 491 for (int i = 0; i < pos_patterns.length; i++) { 492 String pat = pos_patterns[i++]; 493 String inp = pos_patterns[i]; 494 assertTrue("pattern: " + pat + " input: " + inp, Pattern.matches( 495 pat, inp)); 496 } 497 } 498 499 public void testZeroSymbols() { 500 assertTrue(Pattern.matches("[\0]*abb", "\0\0\0\0\0\0abb")); 501 } 502 503 public void testEscapes() { 504 Pattern pat = Pattern.compile("\\Q{]()*?"); 505 Matcher mat = pat.matcher("{]()*?"); 506 507 assertTrue(mat.matches()); 508 } 509 510 public void testRegressions() { 511 // Bug 181 512 Pattern.compile("[\\t-\\r]"); 513 514 // HARMONY-4472 515 Pattern.compile("a*.+"); 516 517 // Bug187 518 Pattern 519 .compile("|(?idmsux-idmsux)|(?idmsux-idmsux)|[^|\\[-\\0274|\\,-\\\\[^|W\\}\\nq\\x65\\002\\xFE\\05\\06\\00\\x66\\x47i\\,\\xF2\\=\\06\\u0EA4\\x9B\\x3C\\f\\|\\{\\xE5\\05\\r\\u944A\\xCA\\e|\\x19\\04\\x07\\04\\u607B\\023\\0073\\x91Tr\\0150\\x83]]?(?idmsux-idmsux:\\p{Alpha}{7}?)||(?<=[^\\uEC47\\01\\02\\u3421\\a\\f\\a\\013q\\035w\\e])(?<=\\p{Punct}{0,}?)(?=^\\p{Lower})(?!\\b{8,14})(?<![|\\00-\\0146[^|\\04\\01\\04\\060\\f\\u224DO\\x1A\\xC4\\00\\02\\0315\\0351\\u84A8\\xCBt\\xCC\\06|\\0141\\00\\=\\e\\f\\x6B\\0026Tb\\040\\x76xJ&&[\\\\-\\]\\05\\07\\02\\u2DAF\\t\\x9C\\e\\0023\\02\\,X\\e|\\u6058flY\\u954C]]]{5}?)(?<=\\p{Sc}{8}+)[^|\\026-\\u89BA|o\\u6277\\t\\07\\x50&&\\p{Punct}]{8,14}+((?<=^\\p{Punct})|(?idmsux-idmsux)||(?>[\\x3E-\\]])|(?idmsux-idmsux:\\p{Punct})|(?<![\\0111\\0371\\xDF\\u6A49\\07\\u2A4D\\00\\0212\\02Xd-\\xED[^\\a-\\0061|\\0257\\04\\f\\[\\0266\\043\\03\\x2D\\042&&[^\\f-\\]&&\\s]]])|(?>[|\\n\\042\\uB09F\\06\\u0F2B\\uC96D\\x89\\uC166\\xAA|\\04-\\][^|\\a\\|\\rx\\04\\uA770\\n\\02\\t\\052\\056\\0274\\|\\=\\07\\e|\\00-\\x1D&&[^\\005\\uB15B\\uCDAC\\n\\x74\\0103\\0147\\uD91B\\n\\062G\\u9B4B\\077\\}\\0324&&[^\\0302\\,\\0221\\04\\u6D16\\04xy\\uD193\\[\\061\\06\\045\\x0F|\\e\\xBB\\f\\u1B52\\023\\u3AD2\\033\\007\\022\\}\\x66\\uA63FJ-\\0304]]]]{0,0})||(?<![^|\\0154U\\u0877\\03\\fy\\n\\|\\0147\\07-\\=[|q\\u69BE\\0243\\rp\\053\\02\\x33I\\u5E39\\u9C40\\052-\\xBC[|\\0064-\\?|\\uFC0C\\x30\\0060\\x45\\\\\\02\\?p\\xD8\\0155\\07\\0367\\04\\uF07B\\000J[^|\\0051-\\{|\\u9E4E\\u7328\\]\\u6AB8\\06\\x71\\a\\]\\e\\|KN\\u06AA\\0000\\063\\u2523&&[\\005\\0277\\x41U\\034\\}R\\u14C7\\u4767\\x09\\n\\054Ev\\0144\\<\\f\\,Q-\\xE4]]]]]{3}+)|(?>^+)|(?![^|\\|\\nJ\\t\\<\\04E\\\\\\t\\01\\\\\\02\\|\\=\\}\\xF3\\uBEC2\\032K\\014\\uCC5F\\072q\\|\\0153\\xD9\\0322\\uC6C8[^\\t\\0342\\x34\\x91\\06\\{\\xF1\\a\\u1710\\?\\xE7\\uC106\\02pF\\<&&[^|\\]\\064\\u381D\\u50CF\\eO&&[^|\\06\\x2F\\04\\045\\032\\u8536W\\0377\\0017|\\x06\\uE5FA\\05\\xD4\\020\\04c\\xFC\\02H\\x0A\\r]]]]+?)(?idmsux-idmsux)|(?<![|\\r-\\,&&[I\\t\\r\\0201\\xDB\\e&&[^|\\02\\06\\00\\<\\a\\u7952\\064\\051\\073\\x41\\?n\\040\\0053\\031&&[\\x15-\\|]]]]{8,11}?)(?![^|\\<-\\uA74B\\xFA\\u7CD2\\024\\07n\\<\\x6A\\0042\\uE4FF\\r\\u896B\\[\\=\\042Y&&^\\p{ASCII}]++)|(?<![R-\\|&&[\\a\\0120A\\u6145\\<\\050-d[|\\e-\\uA07C|\\016-\\u80D9]]]{1,}+)|(?idmsux-idmsux)|(?idmsux-idmsux)|(?idmsux-idmsux:\\B{6,}?)|(?<=\\D{5,8}?)|(?>[\\{-\\0207|\\06-\\0276\\p{XDigit}])(?idmsux-idmsux:[^|\\x52\\0012\\]u\\xAD\\0051f\\0142\\\\l\\|\\050\\05\\f\\t\\u7B91\\r\\u7763\\{|h\\0104\\a\\f\\0234\\u2D4F&&^\\P{InGreek}]))"); 520 // HARMONY-5858 521 Pattern.compile("\\u6211", Pattern.LITERAL); 522 } 523 524 public void testOrphanQuantifiers() { 525 try { 526 Pattern.compile("+++++"); 527 fail("PatternSyntaxException expected"); 528 } catch (PatternSyntaxException pse) { 529 } 530 } 531 532 public void testOrphanQuantifiers2() { 533 try { 534 Pattern.compile("\\d+*"); 535 fail("PatternSyntaxException expected"); 536 } catch (PatternSyntaxException pse) { 537 } 538 } 539 540 public void testBug197() { 541 Object[] vals = { ":", new Integer(2), 542 new String[] { "boo", "and:foo" }, ":", new Integer(5), 543 new String[] { "boo", "and", "foo" }, ":", new Integer(-2), 544 new String[] { "boo", "and", "foo" }, ":", new Integer(3), 545 new String[] { "boo", "and", "foo" }, ":", new Integer(1), 546 new String[] { "boo:and:foo" }, "o", new Integer(5), 547 new String[] { "b", "", ":and:f", "", "" }, "o", 548 new Integer(4), new String[] { "b", "", ":and:f", "o" }, "o", 549 new Integer(-2), new String[] { "b", "", ":and:f", "", "" }, 550 "o", new Integer(0), new String[] { "b", "", ":and:f" } }; 551 552 for (int i = 0; i < vals.length / 3;) { 553 String[] res = Pattern.compile(vals[i++].toString()).split( 554 "boo:and:foo", ((Integer) vals[i++]).intValue()); 555 String[] expectedRes = (String[]) vals[i++]; 556 557 assertEquals(expectedRes.length, res.length); 558 559 for (int j = 0; j < expectedRes.length; j++) { 560 assertEquals(expectedRes[j], res[j]); 561 } 562 } 563 } 564 565 public void testURIPatterns() { 566 String URI_REGEXP_STR = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; 567 String SCHEME_REGEXP_STR = "^[a-zA-Z]{1}[\\w+-.]+$"; 568 String REL_URI_REGEXP_STR = "^(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; 569 String IPV6_REGEXP_STR = "^[0-9a-fA-F\\:\\.]+(\\%\\w+)?$"; 570 String IPV6_REGEXP_STR2 = "^\\[[0-9a-fA-F\\:\\.]+(\\%\\w+)?\\]$"; 571 String IPV4_REGEXP_STR = "^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$"; 572 String HOSTNAME_REGEXP_STR = "\\w+[\\w\\-\\.]*"; 573 574 Pattern.compile(URI_REGEXP_STR); 575 Pattern.compile(REL_URI_REGEXP_STR); 576 Pattern.compile(SCHEME_REGEXP_STR); 577 Pattern.compile(IPV4_REGEXP_STR); 578 Pattern.compile(IPV6_REGEXP_STR); 579 Pattern.compile(IPV6_REGEXP_STR2); 580 Pattern.compile(HOSTNAME_REGEXP_STR); 581 } 582 583 public void testFindBoundaryCases1() { 584 Pattern pat = Pattern.compile(".*\n"); 585 Matcher mat = pat.matcher("a\n"); 586 587 mat.find(); 588 assertEquals("a\n", mat.group()); 589 590 } 591 592 public void testFindBoundaryCases2() { 593 Pattern pat = Pattern.compile(".*A"); 594 Matcher mat = pat.matcher("aAa"); 595 596 mat.find(); 597 assertEquals("aA", mat.group()); 598 599 } 600 601 public void testFindBoundaryCases3() { 602 Pattern pat = Pattern.compile(".*A"); 603 Matcher mat = pat.matcher("a\naA\n"); 604 605 mat.find(); 606 assertEquals("aA", mat.group()); 607 608 } 609 610 public void testFindBoundaryCases4() { 611 Pattern pat = Pattern.compile("A.*"); 612 Matcher mat = pat.matcher("A\n"); 613 614 mat.find(); 615 assertEquals("A", mat.group()); 616 617 } 618 619 public void testFindBoundaryCases5() { 620 Pattern pat = Pattern.compile(".*A.*"); 621 Matcher mat = pat.matcher("\nA\naaa\nA\naaAaa\naaaA\n"); 622 // Matcher mat = pat.matcher("\nA\n"); 623 String[] res = { "A", "A", "aaAaa", "aaaA" }; 624 int k = 0; 625 for (; mat.find(); k++) { 626 assertEquals(res[k], mat.group()); 627 } 628 } 629 630 public void testFindBoundaryCases6() { 631 String[] res = { "", "a", "", "" }; 632 Pattern pat = Pattern.compile(".*"); 633 Matcher mat = pat.matcher("\na\n"); 634 int k = 0; 635 636 for (; mat.find(); k++) { 637 assertEquals(res[k], mat.group()); 638 } 639 } 640 641 public void _testFindBoundaryCases7() { 642 Pattern pat = Pattern.compile(".*"); 643 Matcher mat = pat.matcher("\na\n"); 644 int k = 0; 645 646 for (; mat.find(); k++) { 647 System.out.println(mat.group()); 648 System.out.flush(); 649 } 650 } 651 652 public void testBackReferences() { 653 Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))"); 654 Matcher mat = pat 655 .matcher("(start1: word :start1)(start2: word :start2)"); 656 int k = 1; 657 for (; mat.find(); k++) { 658 assertEquals("start" + k, mat.group(2)); 659 assertEquals(" word ", mat.group(3)); 660 assertEquals("start" + k, mat.group(4)); 661 662 } 663 664 assertEquals(3, k); 665 pat = Pattern.compile(".*(.)\\1"); 666 mat = pat.matcher("saa"); 667 assertTrue(mat.matches()); 668 } 669 670 public void _testBackReferences1() { 671 Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))"); 672 Matcher mat = pat 673 .matcher("(start1: word :start1)(start2: word :start2)"); 674 int k = 1; 675 for (; mat.find(); k++) { 676 System.out.println(mat.group(2)); 677 System.out.println(mat.group(3)); 678 System.out.println(mat.group(4)); 679 680 } 681 682 assertEquals(3, k); 683 } 684 685 public void testNewLine() { 686 Pattern pat = Pattern.compile("(^$)*\n", Pattern.MULTILINE); 687 Matcher mat = pat.matcher("\r\n\n"); 688 int counter = 0; 689 while (mat.find()) { 690 counter++; 691 } 692 assertEquals(2, counter); 693 } 694 695 public void testFindGreedy() { 696 Pattern pat = Pattern.compile(".*aaa", Pattern.DOTALL); 697 Matcher mat = pat.matcher("aaaa\naaa\naaaaaa"); 698 mat.matches(); 699 assertEquals(15, mat.end()); 700 } 701 702 public void testSerialization() throws Exception { 703 Pattern pat = Pattern.compile("a*bc"); 704 SerializableAssert comparator = new SerializableAssert() { 705 public void assertDeserialized(Serializable initial, 706 Serializable deserialized) { 707 assertEquals(((Pattern) initial).toString(), 708 ((Pattern) deserialized).toString()); 709 } 710 }; 711 SerializationTest.verifyGolden(this, pat, comparator); 712 SerializationTest.verifySelf(pat, comparator); 713 } 714 715 public void testSOLQuant() { 716 Pattern pat = Pattern.compile("$*", Pattern.MULTILINE); 717 Matcher mat = pat.matcher("\n\n"); 718 int counter = 0; 719 while (mat.find()) { 720 counter++; 721 } 722 723 assertEquals(3, counter); 724 } 725 726 public void testIllegalEscape() { 727 try { 728 Pattern.compile("\\y"); 729 fail("PatternSyntaxException expected"); 730 } catch (PatternSyntaxException pse) { 731 } 732 } 733 734 public void testEmptyFamily() { 735 Pattern.compile("\\p{Lower}"); 736 } 737 738 public void testNonCaptConstr() { 739 // Flags 740 Pattern pat = Pattern.compile("(?i)b*(?-i)a*"); 741 assertTrue(pat.matcher("bBbBaaaa").matches()); 742 assertFalse(pat.matcher("bBbBAaAa").matches()); 743 744 // Non-capturing groups 745 pat = Pattern.compile("(?i:b*)a*"); 746 assertTrue(pat.matcher("bBbBaaaa").matches()); 747 assertFalse(pat.matcher("bBbBAaAa").matches()); 748 749 pat = Pattern 750 // 1 2 3 4 5 6 7 8 9 10 11 751 .compile("(?:-|(-?\\d+\\d\\d\\d))?(?:-|-(\\d\\d))?(?:-|-(\\d\\d))?(T)?(?:(\\d\\d):(\\d\\d):(\\d\\d)(\\.\\d+)?)?(?:(?:((?:\\+|\\-)\\d\\d):(\\d\\d))|(Z))?"); 752 Matcher mat = pat.matcher("-1234-21-31T41:51:61.789+71:81"); 753 assertTrue(mat.matches()); 754 assertEquals("-1234", mat.group(1)); 755 assertEquals("21", mat.group(2)); 756 assertEquals("31", mat.group(3)); 757 assertEquals("T", mat.group(4)); 758 assertEquals("41", mat.group(5)); 759 assertEquals("51", mat.group(6)); 760 assertEquals("61", mat.group(7)); 761 assertEquals(".789", mat.group(8)); 762 assertEquals("+71", mat.group(9)); 763 assertEquals("81", mat.group(10)); 764 765 // positive lookahead 766 pat = Pattern.compile(".*\\.(?=log$).*$"); 767 assertTrue(pat.matcher("a.b.c.log").matches()); 768 assertFalse(pat.matcher("a.b.c.log.").matches()); 769 770 // negative lookahead 771 pat = Pattern.compile(".*\\.(?!log$).*$"); 772 assertFalse(pat.matcher("abc.log").matches()); 773 assertTrue(pat.matcher("abc.logg").matches()); 774 775 // positive lookbehind 776 pat = Pattern.compile(".*(?<=abc)\\.log$"); 777 assertFalse(pat.matcher("cde.log").matches()); 778 assertTrue(pat.matcher("abc.log").matches()); 779 780 // negative lookbehind 781 pat = Pattern.compile(".*(?<!abc)\\.log$"); 782 assertTrue(pat.matcher("cde.log").matches()); 783 assertFalse(pat.matcher("abc.log").matches()); 784 785 // atomic group 786 pat = Pattern.compile("(?>a*)abb"); 787 assertFalse(pat.matcher("aaabb").matches()); 788 pat = Pattern.compile("(?>a*)bb"); 789 assertTrue(pat.matcher("aaabb").matches()); 790 791 pat = Pattern.compile("(?>a|aa)aabb"); 792 assertTrue(pat.matcher("aaabb").matches()); 793 pat = Pattern.compile("(?>aa|a)aabb"); 794 assertFalse(pat.matcher("aaabb").matches()); 795 796 // quantifiers over look ahead 797 pat = Pattern.compile(".*(?<=abc)*\\.log$"); 798 assertTrue(pat.matcher("cde.log").matches()); 799 pat = Pattern.compile(".*(?<=abc)+\\.log$"); 800 assertFalse(pat.matcher("cde.log").matches()); 801 802 } 803 804 public void _testCorrectReplacementBackreferencedJointSet() { 805 Pattern.compile("ab(a)*\\1"); 806 Pattern.compile("abc(cd)fg"); 807 Pattern.compile("aba*cd"); 808 Pattern.compile("ab(a)*+cd"); 809 Pattern.compile("ab(a)*?cd"); 810 Pattern.compile("ab(a)+cd"); 811 Pattern.compile(".*(.)\\1"); 812 Pattern.compile("ab((a)|c|d)e"); 813 Pattern.compile("abc((a(b))cd)"); 814 Pattern.compile("ab(a)++cd"); 815 Pattern.compile("ab(a)?(c)d"); 816 Pattern.compile("ab(a)?+cd"); 817 Pattern.compile("ab(a)??cd"); 818 Pattern.compile("ab(a)??cd"); 819 Pattern.compile("ab(a){1,3}?(c)d"); 820 } 821 822 public void testCompilePatternWithTerminatorMark() { 823 Pattern pat = Pattern.compile("a\u0000\u0000cd"); 824 Matcher mat = pat.matcher("a\u0000\u0000cd"); 825 assertTrue(mat.matches()); 826 } 827 828 public void testAlternations() { 829 String baseString = "|a|bc"; 830 Pattern pat = Pattern.compile(baseString); 831 Matcher mat = pat.matcher(""); 832 833 assertTrue(mat.matches()); 834 835 baseString = "a||bc"; 836 pat = Pattern.compile(baseString); 837 mat = pat.matcher(""); 838 assertTrue(mat.matches()); 839 840 baseString = "a|bc|"; 841 pat = Pattern.compile(baseString); 842 mat = pat.matcher(""); 843 assertTrue(mat.matches()); 844 845 baseString = "a|b|"; 846 pat = Pattern.compile(baseString); 847 mat = pat.matcher(""); 848 assertTrue(mat.matches()); 849 850 baseString = "a(|b|cd)e"; 851 pat = Pattern.compile(baseString); 852 mat = pat.matcher("ae"); 853 assertTrue(mat.matches()); 854 855 baseString = "a(b||cd)e"; 856 pat = Pattern.compile(baseString); 857 mat = pat.matcher("ae"); 858 assertTrue(mat.matches()); 859 860 baseString = "a(b|cd|)e"; 861 pat = Pattern.compile(baseString); 862 mat = pat.matcher("ae"); 863 assertTrue(mat.matches()); 864 865 baseString = "a(b|c|)e"; 866 pat = Pattern.compile(baseString); 867 mat = pat.matcher("ae"); 868 assertTrue(mat.matches()); 869 870 baseString = "a(|)e"; 871 pat = Pattern.compile(baseString); 872 mat = pat.matcher("ae"); 873 assertTrue(mat.matches()); 874 875 baseString = "|"; 876 pat = Pattern.compile(baseString); 877 mat = pat.matcher(""); 878 assertTrue(mat.matches()); 879 880 baseString = "a(?:|)e"; 881 pat = Pattern.compile(baseString); 882 mat = pat.matcher("ae"); 883 assertTrue(mat.matches()); 884 885 baseString = "a||||bc"; 886 pat = Pattern.compile(baseString); 887 mat = pat.matcher(""); 888 assertTrue(mat.matches()); 889 890 baseString = "(?i-is)|a"; 891 pat = Pattern.compile(baseString); 892 mat = pat.matcher("a"); 893 assertTrue(mat.matches()); 894 } 895 896 public void testMatchWithGroups() { 897 String baseString = "jwkerhjwehrkwjehrkwjhrwkjehrjwkehrjkwhrkwehrkwhrkwrhwkhrwkjehr"; 898 String pattern = ".*(..).*\\1.*"; 899 assertTrue(Pattern.compile(pattern).matcher(baseString).matches()); 900 901 baseString = "saa"; 902 pattern = ".*(.)\\1"; 903 assertTrue(Pattern.compile(pattern).matcher(baseString).matches()); 904 assertTrue(Pattern.compile(pattern).matcher(baseString).find()); 905 } 906 907 public void testSplitEmptyCharSequence() { 908 String s1 = ""; 909 String[] arr = s1.split(":"); 910 assertEquals(arr.length, 1); 911 } 912 913 public void testSplitEndsWithPattern() { 914 assertEquals(",,".split(",", 3).length, 3); 915 assertEquals(",,".split(",", 4).length, 3); 916 917 assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5); 918 assertEquals(Pattern.compile("b").split("ab", -1).length, 2); 919 } 920 921 public void testCaseInsensitiveFlag() { 922 assertTrue(Pattern.matches("(?i-:AbC)", "ABC")); 923 } 924 925 public void testEmptyGroups() { 926 Pattern pat = Pattern.compile("ab(?>)cda"); 927 Matcher mat = pat.matcher("abcda"); 928 assertTrue(mat.matches()); 929 930 pat = Pattern.compile("ab()"); 931 mat = pat.matcher("ab"); 932 assertTrue(mat.matches()); 933 934 pat = Pattern.compile("abc(?:)(..)"); 935 mat = pat.matcher("abcgf"); 936 assertTrue(mat.matches()); 937 } 938 939 public void testCompileNonCaptGroup() { 940 boolean isCompiled = false; 941 942 try { 943 Pattern.compile("(?:)", Pattern.CANON_EQ); 944 Pattern.compile("(?:)", Pattern.CANON_EQ | Pattern.DOTALL); 945 Pattern 946 .compile("(?:)", Pattern.CANON_EQ 947 | Pattern.CASE_INSENSITIVE); 948 Pattern.compile("(?:)", Pattern.CANON_EQ | Pattern.COMMENTS 949 | Pattern.UNIX_LINES); 950 isCompiled = true; 951 } catch (PatternSyntaxException e) { 952 System.out.println(e); 953 } 954 assertTrue(isCompiled); 955 } 956 957 public void testEmbeddedFlags() { 958 String baseString = "(?i)((?s)a)"; 959 String testString = "A"; 960 Pattern pat = Pattern.compile(baseString); 961 Matcher mat = pat.matcher(testString); 962 assertTrue(mat.matches()); 963 964 baseString = "(?x)(?i)(?s)(?d)a"; 965 testString = "A"; 966 pat = Pattern.compile(baseString); 967 mat = pat.matcher(testString); 968 assertTrue(mat.matches()); 969 970 baseString = "(?x)(?i)(?s)(?d)a."; 971 testString = "a\n"; 972 pat = Pattern.compile(baseString); 973 mat = pat.matcher(testString); 974 assertTrue(mat.matches()); 975 976 baseString = "abc(?x:(?i)(?s)(?d)a.)"; 977 testString = "abcA\n"; 978 pat = Pattern.compile(baseString); 979 mat = pat.matcher(testString); 980 assertTrue(mat.matches()); 981 982 baseString = "abc((?x)d)(?i)(?s)a"; 983 testString = "abcdA"; 984 pat = Pattern.compile(baseString); 985 mat = pat.matcher(testString); 986 assertTrue(mat.matches()); 987 } 988 989 public void testAltWithFlags() { 990 Pattern.compile("|(?i-xi)|()"); 991 } 992 993 public void testRestoreFlagsAfterGroup() { 994 String baseString = "abc((?x)d) a"; 995 String testString = "abcd a"; 996 Pattern pat = Pattern.compile(baseString); 997 Matcher mat = pat.matcher(testString); 998 999 assertTrue(mat.matches()); 1000 } 1001 1002 /* 1003 * Verify if the Pattern support the following character classes: 1004 * \p{javaLowerCase} \p{javaUpperCase} \p{javaWhitespace} \p{javaMirrored} 1005 */ 1006 public void testCompileCharacterClass() { 1007 // Regression for HARMONY-606, 696 1008 Pattern pattern = Pattern.compile("\\p{javaLowerCase}"); 1009 assertNotNull(pattern); 1010 1011 pattern = Pattern.compile("\\p{javaUpperCase}"); 1012 assertNotNull(pattern); 1013 1014 pattern = Pattern.compile("\\p{javaWhitespace}"); 1015 assertNotNull(pattern); 1016 1017 pattern = Pattern.compile("\\p{javaMirrored}"); 1018 assertNotNull(pattern); 1019 1020 pattern = Pattern.compile("\\p{javaDefined}"); 1021 assertNotNull(pattern); 1022 1023 pattern = Pattern.compile("\\p{javaDigit}"); 1024 assertNotNull(pattern); 1025 1026 pattern = Pattern.compile("\\p{javaIdentifierIgnorable}"); 1027 assertNotNull(pattern); 1028 1029 pattern = Pattern.compile("\\p{javaISOControl}"); 1030 assertNotNull(pattern); 1031 1032 pattern = Pattern.compile("\\p{javaJavaIdentifierPart}"); 1033 assertNotNull(pattern); 1034 1035 pattern = Pattern.compile("\\p{javaJavaIdentifierStart}"); 1036 assertNotNull(pattern); 1037 1038 pattern = Pattern.compile("\\p{javaLetter}"); 1039 assertNotNull(pattern); 1040 1041 pattern = Pattern.compile("\\p{javaLetterOrDigit}"); 1042 assertNotNull(pattern); 1043 1044 pattern = Pattern.compile("\\p{javaSpaceChar}"); 1045 assertNotNull(pattern); 1046 1047 pattern = Pattern.compile("\\p{javaTitleCase}"); 1048 assertNotNull(pattern); 1049 1050 pattern = Pattern.compile("\\p{javaUnicodeIdentifierPart}"); 1051 assertNotNull(pattern); 1052 1053 pattern = Pattern.compile("\\p{javaUnicodeIdentifierStart}"); 1054 assertNotNull(pattern); 1055 } 1056 1057 public void testCanonEqFlag() { 1058 1059 /* 1060 * for decompositions see 1061 * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt 1062 * http://www.unicode.org/reports/tr15/#Decomposition 1063 */ 1064 String baseString; 1065 String testString; 1066 Pattern pat; 1067 Matcher mat; 1068 1069 baseString = "ab(a*)\\1"; 1070 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1071 1072 baseString = "a(abcdf)d"; 1073 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1074 1075 baseString = "aabcdfd"; 1076 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1077 1078 // \u01E0 -> \u0226\u0304 ->\u0041\u0307\u0304 1079 // \u00CC -> \u0049\u0300 1080 1081 baseString = "\u01E0\u00CCcdb(ac)"; 1082 testString = "\u0226\u0304\u0049\u0300cdbac"; 1083 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1084 mat = pat.matcher(testString); 1085 assertTrue(mat.matches()); 1086 1087 baseString = "\u01E0cdb(a\u00CCc)"; 1088 testString = "\u0041\u0307\u0304cdba\u0049\u0300c"; 1089 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1090 mat = pat.matcher(testString); 1091 assertTrue(mat.matches()); 1092 1093 baseString = "a\u00CC"; 1094 testString = "a\u0049\u0300"; 1095 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1096 mat = pat.matcher(testString); 1097 assertTrue(mat.matches()); 1098 1099 baseString = "\u0226\u0304cdb(ac\u0049\u0300)"; 1100 testString = "\u01E0cdbac\u00CC"; 1101 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1102 mat = pat.matcher(testString); 1103 assertTrue(mat.matches()); 1104 1105 baseString = "cdb(?:\u0041\u0307\u0304\u00CC)"; 1106 testString = "cdb\u0226\u0304\u0049\u0300"; 1107 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1108 mat = pat.matcher(testString); 1109 assertTrue(mat.matches()); 1110 1111 baseString = "\u01E0[a-c]\u0049\u0300cdb(ac)"; 1112 testString = "\u01E0b\u00CCcdbac"; 1113 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1114 mat = pat.matcher(testString); 1115 assertTrue(mat.matches()); 1116 1117 baseString = "\u01E0|\u00CCcdb(ac)"; 1118 testString = "\u0041\u0307\u0304"; 1119 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1120 mat = pat.matcher(testString); 1121 assertTrue(mat.matches()); 1122 1123 baseString = "\u00CC?cdb(ac)*(\u01E0)*[a-c]"; 1124 testString = "cdb\u0041\u0307\u0304b"; 1125 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1126 mat = pat.matcher(testString); 1127 assertTrue(mat.matches()); 1128 1129 baseString = "a\u0300"; 1130 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1131 mat = pat.matcher("a\u00E0a"); 1132 assertTrue(mat.find()); 1133 1134 baseString = "\u7B20\uF9F8abc"; 1135 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1136 mat = pat.matcher("\uF9F8\uF9F8abc"); 1137 assertTrue(mat.matches()); 1138 1139 // \u01F9 -> \u006E\u0300 1140 // \u00C3 -> \u0041\u0303 1141 1142 baseString = "cdb(?:\u00C3\u006E\u0300)"; 1143 testString = "cdb\u0041\u0303\u01F9"; 1144 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1145 mat = pat.matcher(testString); 1146 assertTrue(mat.matches()); 1147 1148 // \u014C -> \u004F\u0304 1149 // \u0163 -> \u0074\u0327 1150 1151 baseString = "cdb(?:\u0163\u004F\u0304)"; 1152 testString = "cdb\u0074\u0327\u014C"; 1153 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1154 mat = pat.matcher(testString); 1155 assertTrue(mat.matches()); 1156 1157 // \u00E1->a\u0301 1158 // canonical ordering takes place \u0301\u0327 -> \u0327\u0301 1159 1160 baseString = "c\u0327\u0301"; 1161 testString = "c\u0301\u0327"; 1162 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1163 mat = pat.matcher(testString); 1164 assertTrue(mat.matches()); 1165 1166 /* 1167 * Hangul decompositions 1168 */ 1169 // \uD4DB->\u1111\u1171\u11B6 1170 // \uD21E->\u1110\u116D\u11B5 1171 // \uD264->\u1110\u1170 1172 // not Hangul:\u0453->\u0433\u0301 1173 baseString = "a\uD4DB\u1111\u1171\u11B6\uD264"; 1174 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1175 1176 baseString = "\u0453c\uD4DB"; 1177 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1178 1179 baseString = "a\u1110\u116D\u11B5b\uD21Ebc"; 1180 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1181 1182 baseString = "\uD4DB\uD21E\u1110\u1170cdb(ac)"; 1183 testString = "\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac"; 1184 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1185 mat = pat.matcher(testString); 1186 assertTrue(mat.matches()); 1187 1188 baseString = "\uD4DB\uD264cdb(a\uD21Ec)"; 1189 testString = "\u1111\u1171\u11B6\u1110\u1170cdba\u1110\u116D\u11B5c"; 1190 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1191 mat = pat.matcher(testString); 1192 assertTrue(mat.matches()); 1193 1194 baseString = "a\uD4DB"; 1195 testString = "a\u1111\u1171\u11B6"; 1196 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1197 mat = pat.matcher(testString); 1198 assertTrue(mat.matches()); 1199 1200 baseString = "a\uD21E"; 1201 testString = "a\u1110\u116D\u11B5"; 1202 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1203 mat = pat.matcher(testString); 1204 assertTrue(mat.matches()); 1205 1206 baseString = "\u1111\u1171\u11B6cdb(ac\u1110\u116D\u11B5)"; 1207 testString = "\uD4DBcdbac\uD21E"; 1208 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1209 mat = pat.matcher(testString); 1210 assertTrue(mat.matches()); 1211 1212 baseString = "cdb(?:\u1111\u1171\u11B6\uD21E)"; 1213 testString = "cdb\uD4DB\u1110\u116D\u11B5"; 1214 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1215 mat = pat.matcher(testString); 1216 assertTrue(mat.matches()); 1217 1218 baseString = "\uD4DB[a-c]\u1110\u116D\u11B5cdb(ac)"; 1219 testString = "\uD4DBb\uD21Ecdbac"; 1220 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1221 mat = pat.matcher(testString); 1222 assertTrue(mat.matches()); 1223 1224 baseString = "\uD4DB|\u00CCcdb(ac)"; 1225 testString = "\u1111\u1171\u11B6"; 1226 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1227 mat = pat.matcher(testString); 1228 assertTrue(mat.matches()); 1229 1230 baseString = "\uD4DB|\u00CCcdb(ac)"; 1231 testString = "\u1111\u1171"; 1232 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1233 mat = pat.matcher(testString); 1234 assertFalse(mat.matches()); 1235 1236 baseString = "\u00CC?cdb(ac)*(\uD4DB)*[a-c]"; 1237 testString = "cdb\u1111\u1171\u11B6b"; 1238 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1239 mat = pat.matcher(testString); 1240 assertTrue(mat.matches()); 1241 1242 baseString = "\uD4DB"; 1243 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1244 mat = pat.matcher("a\u1111\u1171\u11B6a"); 1245 assertTrue(mat.find()); 1246 1247 baseString = "\u1111"; 1248 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1249 mat = pat.matcher("bcda\uD4DBr"); 1250 assertFalse(mat.find()); 1251 } 1252 1253 public void testIndexesCanonicalEq() { 1254 String baseString; 1255 String testString; 1256 Pattern pat; 1257 Matcher mat; 1258 1259 baseString = "\uD4DB"; 1260 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1261 mat = pat.matcher("bcda\u1111\u1171\u11B6awr"); 1262 assertTrue(mat.find()); 1263 assertEquals(mat.start(), 4); 1264 assertEquals(mat.end(), 7); 1265 1266 baseString = "\uD4DB\u1111\u1171\u11B6"; 1267 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1268 mat = pat.matcher("bcda\u1111\u1171\u11B6\uD4DBawr"); 1269 assertTrue(mat.find()); 1270 assertEquals(mat.start(), 4); 1271 assertEquals(mat.end(), 8); 1272 1273 baseString = "\uD4DB\uD21E\u1110\u1170"; 1274 testString = "abcabc\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac"; 1275 pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1276 mat = pat.matcher(testString); 1277 assertTrue(mat.find()); 1278 assertEquals(mat.start(), 6); 1279 assertEquals(mat.end(), 13); 1280 } 1281 1282 public void testCanonEqFlagWithSupplementaryCharacters() { 1283 1284 /* 1285 * \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32 1286 * \uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F 1287 * ->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16 1288 */ 1289 String patString = "abc\uD834\uDDBFef"; 1290 String testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; 1291 Pattern pat = Pattern.compile(patString, Pattern.CANON_EQ); 1292 Matcher mat = pat.matcher(testString); 1293 assertTrue(mat.matches()); 1294 1295 testString = "abc\uD834\uDDBB\uD834\uDD6Fef"; 1296 mat = pat.matcher(testString); 1297 assertTrue(mat.matches()); 1298 1299 patString = "abc\uD834\uDDBB\uD834\uDD6Fef"; 1300 testString = "abc\uD834\uDDBFef"; 1301 pat = Pattern.compile(patString, Pattern.CANON_EQ); 1302 mat = pat.matcher(testString); 1303 assertTrue(mat.matches()); 1304 1305 testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; 1306 mat = pat.matcher(testString); 1307 assertTrue(mat.matches()); 1308 1309 patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; 1310 testString = "abc\uD834\uDDBFef"; 1311 pat = Pattern.compile(patString, Pattern.CANON_EQ); 1312 mat = pat.matcher(testString); 1313 assertTrue(mat.matches()); 1314 1315 testString = "abc\uD834\uDDBB\uD834\uDD6Fef"; 1316 mat = pat.matcher(testString); 1317 assertTrue(mat.matches()); 1318 1319 /* 1320 * testSupplementary characters with no decomposition 1321 */ 1322 patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef"; 1323 testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef"; 1324 pat = Pattern.compile(patString, Pattern.CANON_EQ); 1325 mat = pat.matcher(testString); 1326 assertTrue(mat.matches()); 1327 } 1328 1329 public void testRangesWithSurrogatesSupplementary() { 1330 String patString = "[abc\uD8D2]"; 1331 String testString = "\uD8D2"; 1332 Pattern pat = Pattern.compile(patString); 1333 Matcher mat = pat.matcher(testString); 1334 assertTrue(mat.matches()); 1335 1336 testString = "a"; 1337 mat = pat.matcher(testString); 1338 assertTrue(mat.matches()); 1339 1340 testString = "ef\uD8D2\uDD71gh"; 1341 mat = pat.matcher(testString); 1342 assertFalse(mat.find()); 1343 1344 testString = "ef\uD8D2gh"; 1345 mat = pat.matcher(testString); 1346 assertTrue(mat.find()); 1347 1348 patString = "[abc\uD8D3&&[c\uD8D3]]"; 1349 testString = "c"; 1350 pat = Pattern.compile(patString); 1351 mat = pat.matcher(testString); 1352 assertTrue(mat.matches()); 1353 1354 testString = "a"; 1355 mat = pat.matcher(testString); 1356 assertFalse(mat.matches()); 1357 1358 testString = "ef\uD8D3\uDD71gh"; 1359 mat = pat.matcher(testString); 1360 assertFalse(mat.find()); 1361 1362 testString = "ef\uD8D3gh"; 1363 mat = pat.matcher(testString); 1364 assertTrue(mat.find()); 1365 1366 patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]"; 1367 testString = "c"; 1368 pat = Pattern.compile(patString); 1369 mat = pat.matcher(testString); 1370 assertTrue(mat.matches()); 1371 1372 testString = "\uDBEE\uDF0C"; 1373 mat = pat.matcher(testString); 1374 assertTrue(mat.matches()); 1375 1376 testString = "ef\uD8D3\uDD71gh"; 1377 mat = pat.matcher(testString); 1378 assertFalse(mat.find()); 1379 1380 testString = "ef\uD8D3gh"; 1381 mat = pat.matcher(testString); 1382 assertTrue(mat.find()); 1383 1384 patString = "[abc\uDBFC]\uDDC2cd"; 1385 testString = "\uDBFC\uDDC2cd"; 1386 pat = Pattern.compile(patString); 1387 mat = pat.matcher(testString); 1388 assertFalse(mat.matches()); 1389 1390 testString = "a\uDDC2cd"; 1391 mat = pat.matcher(testString); 1392 assertTrue(mat.matches()); 1393 } 1394 1395 public void testSequencesWithSurrogatesSupplementary() { 1396 String patString = "abcd\uD8D3"; 1397 String testString = "abcd\uD8D3\uDFFC"; 1398 Pattern pat = Pattern.compile(patString); 1399 Matcher mat = pat.matcher(testString); 1400 assertFalse(mat.find()); 1401 1402 testString = "abcd\uD8D3abc"; 1403 mat = pat.matcher(testString); 1404 assertTrue(mat.find()); 1405 1406 patString = "ab\uDBEFcd"; 1407 testString = "ab\uDBEFcd"; 1408 pat = Pattern.compile(patString); 1409 mat = pat.matcher(testString); 1410 assertTrue(mat.matches()); 1411 1412 patString = "\uDFFCabcd"; 1413 testString = "\uD8D3\uDFFCabcd"; 1414 pat = Pattern.compile(patString); 1415 mat = pat.matcher(testString); 1416 assertFalse(mat.find()); 1417 1418 testString = "abc\uDFFCabcdecd"; 1419 mat = pat.matcher(testString); 1420 assertTrue(mat.find()); 1421 1422 patString = "\uD8D3\uDFFCabcd"; 1423 testString = "abc\uD8D3\uD8D3\uDFFCabcd"; 1424 pat = Pattern.compile(patString); 1425 mat = pat.matcher(testString); 1426 assertTrue(mat.find()); 1427 } 1428 1429 public void testPredefinedClassesWithSurrogatesSupplementary() { 1430 String patString = "[123\\D]"; 1431 String testString = "a"; 1432 Pattern pat = Pattern.compile(patString); 1433 Matcher mat = pat.matcher(testString); 1434 assertTrue(mat.find()); 1435 1436 testString = "5"; 1437 mat = pat.matcher(testString); 1438 assertFalse(mat.find()); 1439 1440 testString = "3"; 1441 mat = pat.matcher(testString); 1442 assertTrue(mat.find()); 1443 1444 // low surrogate 1445 testString = "\uDFC4"; 1446 mat = pat.matcher(testString); 1447 assertTrue(mat.find()); 1448 1449 // high surrogate 1450 testString = "\uDADA"; 1451 mat = pat.matcher(testString); 1452 assertTrue(mat.find()); 1453 1454 testString = "\uDADA\uDFC4"; 1455 mat = pat.matcher(testString); 1456 assertTrue(mat.find()); 1457 1458 patString = "[123[^\\p{javaDigit}]]"; 1459 testString = "a"; 1460 pat = Pattern.compile(patString); 1461 mat = pat.matcher(testString); 1462 assertTrue(mat.find()); 1463 1464 testString = "5"; 1465 mat = pat.matcher(testString); 1466 assertFalse(mat.find()); 1467 1468 testString = "3"; 1469 mat = pat.matcher(testString); 1470 assertTrue(mat.find()); 1471 1472 // low surrogate 1473 testString = "\uDFC4"; 1474 mat = pat.matcher(testString); 1475 assertTrue(mat.find()); 1476 1477 // high surrogate 1478 testString = "\uDADA"; 1479 mat = pat.matcher(testString); 1480 assertTrue(mat.find()); 1481 1482 testString = "\uDADA\uDFC4"; 1483 mat = pat.matcher(testString); 1484 assertTrue(mat.find()); 1485 1486 // surrogate characters 1487 patString = "\\p{Cs}"; 1488 testString = "\uD916\uDE27"; 1489 pat = Pattern.compile(patString); 1490 mat = pat.matcher(testString); 1491 1492 /* 1493 * see http://www.unicode.org/reports/tr18/#Supplementary_Characters we 1494 * have to treat text as code points not code units. \\p{Cs} matches any 1495 * surrogate character but here testString is a one code point 1496 * consisting of two code units (two surrogate characters) so we find 1497 * nothing 1498 */ 1499 assertFalse(mat.find()); 1500 1501 // swap low and high surrogates 1502 testString = "\uDE27\uD916"; 1503 mat = pat.matcher(testString); 1504 assertTrue(mat.find()); 1505 1506 patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]"; 1507 testString = "1"; 1508 pat = Pattern.compile(patString); 1509 mat = pat.matcher(testString); 1510 assertTrue(mat.find()); 1511 1512 testString = "\uD916"; 1513 pat = Pattern.compile(patString); 1514 mat = pat.matcher(testString); 1515 assertFalse(mat.find()); 1516 1517 testString = "\uD916\uDE27"; 1518 pat = Pattern.compile(patString); 1519 mat = pat.matcher(testString); 1520 assertTrue(mat.find()); 1521 1522 // \uD9A0\uDE8E=\u7828E 1523 // \u78281=\uD9A0\uDE81 1524 patString = "[a-\uD9A0\uDE8E]"; 1525 testString = "\uD9A0\uDE81"; 1526 pat = Pattern.compile(patString); 1527 mat = pat.matcher(testString); 1528 assertTrue(mat.matches()); 1529 } 1530 1531 public void testDotConstructionWithSurrogatesSupplementary() { 1532 String patString = "."; 1533 String testString = "\uD9A0\uDE81"; 1534 Pattern pat = Pattern.compile(patString); 1535 Matcher mat = pat.matcher(testString); 1536 assertTrue(mat.matches()); 1537 1538 testString = "\uDE81"; 1539 mat = pat.matcher(testString); 1540 assertTrue(mat.matches()); 1541 1542 testString = "\uD9A0"; 1543 mat = pat.matcher(testString); 1544 assertTrue(mat.matches()); 1545 1546 testString = "\n"; 1547 mat = pat.matcher(testString); 1548 assertFalse(mat.matches()); 1549 1550 patString = ".*\uDE81"; 1551 testString = "\uD9A0\uDE81\uD9A0\uDE81\uD9A0\uDE81"; 1552 pat = Pattern.compile(patString); 1553 mat = pat.matcher(testString); 1554 assertFalse(mat.matches()); 1555 1556 testString = "\uD9A0\uDE81\uD9A0\uDE81\uDE81"; 1557 mat = pat.matcher(testString); 1558 assertTrue(mat.matches()); 1559 1560 patString = ".*"; 1561 testString = "\uD9A0\uDE81\n\uD9A0\uDE81\uD9A0\n\uDE81"; 1562 pat = Pattern.compile(patString, Pattern.DOTALL); 1563 mat = pat.matcher(testString); 1564 assertTrue(mat.matches()); 1565 } 1566 1567 public void testQuantifiersWithSurrogatesSupplementary() { 1568 String patString = "\uD9A0\uDE81*abc"; 1569 String testString = "\uD9A0\uDE81\uD9A0\uDE81abc"; 1570 Pattern pat = Pattern.compile(patString); 1571 Matcher mat = pat.matcher(testString); 1572 assertTrue(mat.matches()); 1573 1574 testString = "abc"; 1575 mat = pat.matcher(testString); 1576 assertTrue(mat.matches()); 1577 } 1578 1579 public void testAlternationsWithSurrogatesSupplementary() { 1580 String patString = "\uDE81|\uD9A0\uDE81|\uD9A0"; 1581 String testString = "\uD9A0"; 1582 Pattern pat = Pattern.compile(patString); 1583 Matcher mat = pat.matcher(testString); 1584 assertTrue(mat.matches()); 1585 1586 testString = "\uDE81"; 1587 mat = pat.matcher(testString); 1588 assertTrue(mat.matches()); 1589 1590 testString = "\uD9A0\uDE81"; 1591 mat = pat.matcher(testString); 1592 assertTrue(mat.matches()); 1593 1594 testString = "\uDE81\uD9A0"; 1595 mat = pat.matcher(testString); 1596 assertFalse(mat.matches()); 1597 } 1598 1599 public void testGroupsWithSurrogatesSupplementary() { 1600 1601 // this pattern matches nothing 1602 String patString = "(\uD9A0)\uDE81"; 1603 String testString = "\uD9A0\uDE81"; 1604 Pattern pat = Pattern.compile(patString); 1605 Matcher mat = pat.matcher(testString); 1606 assertFalse(mat.matches()); 1607 1608 patString = "(\uD9A0)"; 1609 testString = "\uD9A0\uDE81"; 1610 pat = Pattern.compile(patString, Pattern.DOTALL); 1611 mat = pat.matcher(testString); 1612 assertFalse(mat.find()); 1613 } 1614 1615 /* 1616 * Regression test for HARMONY-688 1617 */ 1618 public void testUnicodeCategoryWithSurrogatesSupplementary() { 1619 Pattern p = Pattern.compile("\\p{javaLowerCase}"); 1620 Matcher matcher = p.matcher("\uD801\uDC28"); 1621 assertTrue(matcher.find()); 1622 } 1623 1624} 1625