1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package org.apache.harmony.regex.tests.java.util.regex; 19 20import java.io.Serializable; 21import java.util.regex.Matcher; 22import java.util.regex.Pattern; 23import java.util.regex.PatternSyntaxException; 24 25import junit.framework.TestCase; 26 27import org.apache.harmony.testframework.serialization.SerializationTest; 28import org.apache.harmony.testframework.serialization.SerializationTest.SerializableAssert; 29 30public class PatternTest extends TestCase { 31 String[] testPatterns = { 32 "(a|b)*abb", 33 "(1*2*3*4*)*567", 34 "(a|b|c|d)*aab", 35 "(1|2|3|4|5|6|7|8|9|0)(1|2|3|4|5|6|7|8|9|0)*", 36 "(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)*", 37 "(a|b)*(a|b)*A(a|b)*lice.*", 38 "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)(a|b|c|d|e|f|g|h|" 39 + "i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)*(1|2|3|4|5|6|7|8|9|0)*|while|for|struct|if|do", 40// BEGIN android-changed 41// We don't have canonical equivalence. 42// "x(?c)y", "x(?cc)y" 43// "x(?:c)y" 44// END android-changed 45 46 }; 47 48 String[] testPatternsAlt = { 49 /* 50 * According to JavaDoc 2 and 3 oct digit sequences like \\o70\\o347 51 * should be OK, but test is failed for them 52 */ 53 "[ab]\\b\\\\o5\\xF9\\u1E7B\\t\\n\\f\\r\\a\\e[yz]", 54 "^\\p{Lower}*\\p{Upper}*\\p{ASCII}?\\p{Alpha}?\\p{Digit}*\\p{Alnum}\\p{Punct}\\p{Graph}\\p{Print}\\p{Blank}\\p{Cntrl}\\p{XDigit}\\p{Space}", 55 "$\\p{javaLowerCase}\\p{javaUpperCase}\\p{javaWhitespace}\\p{javaMirrored}", 56 "\\p{InGreek}\\p{Lu}\\p{Sc}\\P{InGreek}[\\p{L}&&[^\\p{Lu}]]" }; 57 58 String[] wrongTestPatterns = { "\\o9A", "\\p{Lawer}", "\\xG0" }; 59 60 final static int[] flagsSet = { Pattern.CASE_INSENSITIVE, 61 Pattern.MULTILINE, Pattern.DOTALL, Pattern.UNICODE_CASE 62 /* , Pattern.CANON_EQ */ }; 63 64 /* 65 * Based on RI implenetation documents. Need to check this set regarding 66 * actual implementation. 67 */ 68 final static int[] wrongFlagsSet = { 256, 512, 1024 }; 69 70 final static int DEFAULT_FLAGS = 0; 71 72 public void testMatcher() { 73 // some very simple test 74 Pattern p = Pattern.compile("a"); 75 assertNotNull(p.matcher("bcde")); 76 assertNotSame(p.matcher("a"), p.matcher("a")); 77 } 78 79 public void testSplitCharSequenceint() { 80 // splitting CharSequence which ends with pattern 81 // bug6193 82 assertEquals(",,".split(",", 3).length, 3); 83 assertEquals(",,".split(",", 4).length, 3); 84 // bug6193 85 // bug5391 86 assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5); 87 assertEquals(Pattern.compile("b").split("ab", -1).length, 2); 88 // bug5391 89 String s[]; 90 Pattern pat = Pattern.compile("x"); 91 s = pat.split("zxx:zzz:zxx", 10); 92 assertEquals(s.length, 5); 93 s = pat.split("zxx:zzz:zxx", 3); 94 assertEquals(s.length, 3); 95 s = pat.split("zxx:zzz:zxx", -1); 96 assertEquals(s.length, 5); 97 s = pat.split("zxx:zzz:zxx", 0); 98 assertEquals(s.length, 3); 99 // other splitting 100 // negative limit 101 pat = Pattern.compile("b"); 102 s = pat.split("abccbadfebb", -1); 103 assertEquals(s.length, 5); 104 s = pat.split("", -1); 105 assertEquals(s.length, 1); 106 pat = Pattern.compile(""); 107 s = pat.split("", -1); 108 assertEquals(s.length, 1); 109 s = pat.split("abccbadfe", -1); 110 assertEquals(s.length, 11); 111 // zero limit 112 pat = Pattern.compile("b"); 113 s = pat.split("abccbadfebb", 0); 114 assertEquals(s.length, 3); 115 s = pat.split("", 0); 116 assertEquals(s.length, 1); 117 pat = Pattern.compile(""); 118 s = pat.split("", 0); 119 assertEquals(s.length, 1); 120 s = pat.split("abccbadfe", 0); 121 assertEquals(s.length, 10); 122 // positive limit 123 pat = Pattern.compile("b"); 124 s = pat.split("abccbadfebb", 12); 125 assertEquals(s.length, 5); 126 s = pat.split("", 6); 127 assertEquals(s.length, 1); 128 pat = Pattern.compile(""); 129 s = pat.split("", 11); 130 assertEquals(s.length, 1); 131 s = pat.split("abccbadfe", 15); 132 assertEquals(s.length, 11); 133 134 pat = Pattern.compile("b"); 135 s = pat.split("abccbadfebb", 5); 136 assertEquals(s.length, 5); 137 s = pat.split("", 1); 138 assertEquals(s.length, 1); 139 pat = Pattern.compile(""); 140 s = pat.split("", 1); 141 assertEquals(s.length, 1); 142 s = pat.split("abccbadfe", 11); 143 assertEquals(s.length, 11); 144 145 pat = Pattern.compile("b"); 146 s = pat.split("abccbadfebb", 3); 147 assertEquals(s.length, 3); 148 pat = Pattern.compile(""); 149 s = pat.split("abccbadfe", 5); 150 assertEquals(s.length, 5); 151 } 152 153 public void testSplitCharSequence() { 154 String s[]; 155 Pattern pat = Pattern.compile("b"); 156 s = pat.split("abccbadfebb"); 157 assertEquals(s.length, 3); 158 s = pat.split(""); 159 assertEquals(s.length, 1); 160 pat = Pattern.compile(""); 161 s = pat.split(""); 162 assertEquals(s.length, 1); 163 s = pat.split("abccbadfe"); 164 assertEquals(s.length, 10); 165 // bug6544 166 String s1 = ""; 167 String[] arr = s1.split(":"); 168 assertEquals(arr.length, 1); 169 // bug6544 170 } 171 172 public void testPattern() { 173 /* Positive assertion test. */ 174 for (String aPattern : testPatterns) { 175 Pattern p = Pattern.compile(aPattern); 176 try { 177 assertTrue(p.pattern().equals(aPattern)); 178 } catch (Exception e) { 179 fail("Unexpected exception: " + e); 180 } 181 } 182 } 183 184 public void testCompile() { 185 /* Positive assertion test. */ 186 for (String aPattern : testPatterns) { 187 try { 188 Pattern p = Pattern.compile(aPattern); 189 } catch (Exception e) { 190 fail("Unexpected exception: " + e); 191 } 192 } 193 194 /* Positive assertion test with alternative templates. */ 195 for (String aPattern : testPatternsAlt) { 196 try { 197 Pattern p = Pattern.compile(aPattern); 198 } catch (Exception e) { 199 fail("Unexpected exception: " + e); 200 } 201 } 202 203 /* Negative assertion test. */ 204 for (String aPattern : wrongTestPatterns) { 205 try { 206 Pattern p = Pattern.compile(aPattern); 207 fail("PatternSyntaxException is expected"); 208 } catch (PatternSyntaxException pse) { 209 /* OKAY */ 210 } catch (Exception e) { 211 fail("Unexpected exception: " + e); 212 } 213 } 214 } 215 216 public void testFlags() { 217 String baseString; 218 String testString; 219 Pattern pat; 220 Matcher mat; 221 222 baseString = "((?i)|b)a"; 223 testString = "A"; 224 pat = Pattern.compile(baseString); 225 mat = pat.matcher(testString); 226 assertFalse(mat.matches()); 227 228 baseString = "(?i)a|b"; 229 testString = "A"; 230 pat = Pattern.compile(baseString); 231 mat = pat.matcher(testString); 232 assertTrue(mat.matches()); 233 234 baseString = "(?i)a|b"; 235 testString = "B"; 236 pat = Pattern.compile(baseString); 237 mat = pat.matcher(testString); 238 assertTrue(mat.matches()); 239 240 baseString = "c|(?i)a|b"; 241 testString = "B"; 242 pat = Pattern.compile(baseString); 243 mat = pat.matcher(testString); 244 assertTrue(mat.matches()); 245 246 baseString = "(?i)a|(?s)b"; 247 testString = "B"; 248 pat = Pattern.compile(baseString); 249 mat = pat.matcher(testString); 250 assertTrue(mat.matches()); 251 252 baseString = "(?i)a|(?-i)b"; 253 testString = "B"; 254 pat = Pattern.compile(baseString); 255 mat = pat.matcher(testString); 256 assertFalse(mat.matches()); 257 258 baseString = "(?i)a|(?-i)c|b"; 259 testString = "B"; 260 pat = Pattern.compile(baseString); 261 mat = pat.matcher(testString); 262 assertFalse(mat.matches()); 263 264 baseString = "(?i)a|(?-i)c|(?i)b"; 265 testString = "B"; 266 pat = Pattern.compile(baseString); 267 mat = pat.matcher(testString); 268 assertTrue(mat.matches()); 269 270 baseString = "(?i)a|(?-i)b"; 271 testString = "A"; 272 pat = Pattern.compile(baseString); 273 mat = pat.matcher(testString); 274 assertTrue(mat.matches()); 275 276 baseString = "((?i))a"; 277 testString = "A"; 278 pat = Pattern.compile(baseString); 279 mat = pat.matcher(testString); 280 assertFalse(mat.matches()); 281 282 baseString = "|(?i)|a"; 283 testString = "A"; 284 pat = Pattern.compile(baseString); 285 mat = pat.matcher(testString); 286 assertTrue(mat.matches()); 287 288 baseString = "(?i)((?s)a.)"; 289 testString = "A\n"; 290 pat = Pattern.compile(baseString); 291 mat = pat.matcher(testString); 292 assertTrue(mat.matches()); 293 294 baseString = "(?i)((?-i)a)"; 295 testString = "A"; 296 pat = Pattern.compile(baseString); 297 mat = pat.matcher(testString); 298 assertFalse(mat.matches()); 299 300 baseString = "(?i)(?s:a.)"; 301 testString = "A\n"; 302 pat = Pattern.compile(baseString); 303 mat = pat.matcher(testString); 304 assertTrue(mat.matches()); 305 306 baseString = "(?i)fgh(?s:aa)"; 307 testString = "fghAA"; 308 pat = Pattern.compile(baseString); 309 mat = pat.matcher(testString); 310 assertTrue(mat.matches()); 311 312 baseString = "(?i)((?-i))a"; 313 testString = "A"; 314 pat = Pattern.compile(baseString); 315 mat = pat.matcher(testString); 316 assertTrue(mat.matches()); 317 318 baseString = "abc(?i)d"; 319 testString = "ABCD"; 320 pat = Pattern.compile(baseString); 321 mat = pat.matcher(testString); 322 assertFalse(mat.matches()); 323 324 testString = "abcD"; 325 mat = pat.matcher(testString); 326 assertTrue(mat.matches()); 327 328 baseString = "a(?i)a(?-i)a(?i)a(?-i)a"; 329 testString = "aAaAa"; 330 pat = Pattern.compile(baseString); 331 mat = pat.matcher(testString); 332 assertTrue(mat.matches()); 333 334 testString = "aAAAa"; 335 mat = pat.matcher(testString); 336 assertFalse(mat.matches()); 337 } 338 339// BEGIN android-removed 340// The flags() method should only return those flags that were explicitly 341// passed during the compilation. The JDK also accepts the ones implicitly 342// contained in the pattern, but ICU doesn't do this. 343// 344// public void testFlagsMethod() { 345// String baseString; 346// Pattern pat; 347// 348// /* 349// * These tests are for compatibility with RI only. Logically we have to 350// * return only flags specified during the compilation. For example 351// * pat.flags() == 0 when we compile Pattern pat = 352// * Pattern.compile("(?i)abc(?-i)"); but the whole expression is compiled 353// * in a case insensitive manner. So there is little sense to do calls to 354// * flags() now. 355// */ 356// baseString = "(?-i)"; 357// pat = Pattern.compile(baseString); 358// 359// baseString = "(?idmsux)abc(?-i)vg(?-dmu)"; 360// pat = Pattern.compile(baseString); 361// assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS); 362// 363// baseString = "(?idmsux)abc|(?-i)vg|(?-dmu)"; 364// pat = Pattern.compile(baseString); 365// assertEquals(pat.flags(), Pattern.DOTALL | Pattern.COMMENTS); 366// 367// baseString = "(?is)a((?x)b.)"; 368// pat = Pattern.compile(baseString); 369// assertEquals(pat.flags(), Pattern.DOTALL | Pattern.CASE_INSENSITIVE); 370// 371// baseString = "(?i)a((?-i))"; 372// pat = Pattern.compile(baseString); 373// assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE); 374// 375// baseString = "((?i)a)"; 376// pat = Pattern.compile(baseString); 377// assertEquals(pat.flags(), 0); 378// 379// pat = Pattern.compile("(?is)abc"); 380// assertEquals(pat.flags(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL); 381// } 382//END android-removed 383 384 /* 385 * Check default flags when they are not specified in pattern. Based on RI 386 * since could not find that info 387 */ 388 public void testFlagsCompileDefault() { 389 for (String pat : testPatternsAlt) { 390 try { 391 Pattern p = Pattern.compile(pat); 392 assertEquals(p.flags(), DEFAULT_FLAGS); 393 } catch (Exception e) { 394 fail("Unexpected exception: " + e); 395 } 396 } 397 } 398 399 /* 400 * Check that flags specified during compile are set properly This is a 401 * simple implementation that does not use flags combinations. Need to 402 * improve. 403 */ 404 public void testFlagsCompileValid() { 405 for (String pat : testPatternsAlt) { 406 for (int flags : flagsSet) { 407 try { 408 Pattern p = Pattern.compile(pat, flags); 409 assertEquals(p.flags(), flags); 410 } catch (Exception e) { 411 fail("Unexpected exception: " + e); 412 } 413 } 414 } 415 } 416 417 public void testCompileStringint() { 418 /* 419 * these tests are needed to verify that appropriate exceptions are 420 * thrown 421 */ 422 String pattern = "b)a"; 423 try { 424 Pattern.compile(pattern); 425 fail("Expected a PatternSyntaxException when compiling pattern: " 426 + pattern); 427 } catch (PatternSyntaxException e) { 428 // pass 429 } 430 pattern = "bcde)a"; 431 try { 432 Pattern.compile(pattern); 433 fail("Expected a PatternSyntaxException when compiling pattern: " 434 + pattern); 435 } catch (PatternSyntaxException e) { 436 // pass 437 } 438 pattern = "bbg())a"; 439 try { 440 Pattern pat = Pattern.compile(pattern); 441 fail("Expected a PatternSyntaxException when compiling pattern: " 442 + pattern); 443 } catch (PatternSyntaxException e) { 444 // pass 445 } 446 447 pattern = "cdb(?i))a"; 448 try { 449 Pattern pat = Pattern.compile(pattern); 450 fail("Expected a PatternSyntaxException when compiling pattern: " 451 + pattern); 452 } catch (PatternSyntaxException e) { 453 // pass 454 } 455 456 /* 457 * This pattern should compile - HARMONY-2127 458 */ 459// pattern = "x(?c)y"; 460// Pattern.compile(pattern); 461 462 /* 463 * this pattern doesn't match any string, but should be compiled anyway 464 */ 465 pattern = "(b\\1)a"; 466 Pattern.compile(pattern); 467 } 468 469 /* 470 * Class under test for Pattern compile(String) 471 */ 472 public void testQuantCompileNeg() { 473 String[] patterns = { "5{,2}", "{5asd", "{hgdhg", "{5,hjkh", "{,5hdsh", 474 "{5,3shdfkjh}" }; 475 for (String element : patterns) { 476 try { 477 Pattern.compile(element); 478 fail("PatternSyntaxException was expected, but compilation succeeds"); 479 } catch (PatternSyntaxException pse) { 480 continue; 481 } 482 } 483 // Regression for HARMONY-1365 484// BEGIN android-changed 485// Original regex contained some illegal stuff. Changed it slightly, 486// while maintaining the wicked character of this "mother of all 487// regexes". 488// String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\G*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\B*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]*+)|(?x-xd:^{5}+)()"; 489 String pattern = "(?![^\\<C\\f\\0146\\0270\\}&&[|\\02-\\x3E\\}|X-\\|]]{7,}+)[|\\\\\\x98\\<\\?\\u4FCFr\\,\\0025\\}\\004|\\0025-\\052\061]|(?<![|\\01-\\u829E])|(?<!\\p{Alpha})|^|(?-s:[^\\x15\\\\\\x24F\\a\\,\\a\\u97D8[\\x38\\a[\\0224-\\0306[^\\0020-\\u6A57]]]]??)(?uxix:[^|\\{\\[\\0367\\t\\e\\x8C\\{\\[\\074c\\]V[|b\\fu\\r\\0175\\<\\07f\\066s[^D-\\x5D]]])(?xx:^{5,}+)(?uuu)(?=^\\D)|(?!\\G)(?>\\.*?)(?![^|\\]\\070\\ne\\{\\t\\[\\053\\?\\\\\\x51\\a\\075\\0023-\\[&&[|\\022-\\xEA\\00-\\u41C2&&[^|a-\\xCC&&[^\\037\\uECB3\\u3D9A\\x31\\|\\<b\\0206\\uF2EC\\01m\\,\\ak\\a\\03&&\\p{Punct}]]]])(?-dxs:[|\\06-\\07|\\e-\\x63&&[|Tp\\u18A3\\00\\|\\xE4\\05\\061\\015\\0116C|\\r\\{\\}\\006\\xEA\\0367\\xC4\\01\\0042\\0267\\xBB\\01T\\}\\0100\\?[|\\[-\\u459B|\\x23\\x91\\rF\\0376[|\\?-\\x94\\0113-\\\\\\s]]]]{6}?)(?<=[^\\t-\\x42H\\04\\f\\03\\0172\\?i\\u97B6\\e\\f\\uDAC2])(?=\\.*+)(?>[^\\016\\r\\{\\,\\uA29D\\034\\02[\\02-\\[|\\t\\056\\uF599\\x62\\e\\<\\032\\uF0AC\\0026\\0205Q\\|\\\\\\06\\0164[|\\057-\\u7A98&&[\\061-g|\\|\\0276\\n\\042\\011\\e\\xE8\\x64B\\04\\u6D0EDW^\\p{Lower}]]]]?)(?<=[^\\n\\\\\\t\\u8E13\\,\\0114\\u656E\\xA5\\]&&[\\03-\\026|\\uF39D\\01\\{i\\u3BC2\\u14FE]])(?<=[^|\\uAE62\\054H\\|\\}&&^\\p{Space}])(?sxx)(?<=[\\f\\006\\a\\r\\xB4]{1,5})|(?x-xd:^{5}+)()"; 490// END android-changed 491 assertNotNull(Pattern.compile(pattern)); 492 } 493 494 public void testQuantCompilePos() { 495 String[] patterns = {/* "(abc){1,3}", */"abc{2,}", "abc{5}" }; 496 for (String element : patterns) { 497 Pattern.compile(element); 498 } 499 } 500 501 public void testQuantComposition() { 502 String pattern = "(a{1,3})aab"; 503 java.util.regex.Pattern pat = java.util.regex.Pattern.compile(pattern); 504 java.util.regex.Matcher mat = pat.matcher("aaab"); 505 mat.matches(); 506 mat.start(1); 507 mat.group(1); 508 } 509 510 public void testMatches() { 511 String[][] posSeq = { 512 { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" }, 513 { "213567", "12324567", "1234567", "213213567", 514 "21312312312567", "444444567" }, 515 { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" }, 516 { "213234567", "3458", "0987654", "7689546432", "0398576", 517 "98432", "5" }, 518 { 519 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", 520 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 521 + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" }, 522 { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa", 523 "abbbAbbbliceaaa", "Alice" }, 524 { "a123", "bnxnvgds156", "for", "while", "if", "struct" }, 525 { "xy" }, { "xy" }, { "xcy" } 526 527 }; 528 529 for (int i = 0; i < testPatterns.length; i++) { 530 for (int j = 0; j < posSeq[i].length; j++) { 531 assertTrue("Incorrect match: " + testPatterns[i] + " vs " 532 + posSeq[i][j], Pattern.matches(testPatterns[i], 533 posSeq[i][j])); 534 } 535 } 536 } 537 538 public void testMatchesException() { 539 /* Negative assertion test. */ 540 for (String aPattern : wrongTestPatterns) { 541 try { 542 Pattern.matches(aPattern, "Foo"); 543 fail("PatternSyntaxException is expected"); 544 } catch (PatternSyntaxException pse) { 545 /* OKAY */ 546 } catch (Exception e) { 547 fail("Unexpected exception: " + e); 548 } 549 } 550 } 551 552 public void testTimeZoneIssue() { 553 Pattern p = Pattern.compile("GMT(\\+|\\-)(\\d+)(:(\\d+))?"); 554 Matcher m = p.matcher("GMT-9:45"); 555 assertTrue(m.matches()); 556 assertEquals("-", m.group(1)); 557 assertEquals("9", m.group(2)); 558 assertEquals(":45", m.group(3)); 559 assertEquals("45", m.group(4)); 560 } 561 562// BEGIN android-changed 563// Removed one pattern that is buggy on the JDK. We don't want to duplicate that. 564 public void testCompileRanges() { 565 String[] correctTestPatterns = { "[^]*abb]*", /* "[^a-d[^m-p]]*abb", */ 566 "[a-d\\d]*abb", "[abc]*abb", "[a-e&&[de]]*abb", "[^abc]*abb", 567 "[a-e&&[^de]]*abb", "[a-z&&[^m-p]]*abb", "[a-d[m-p]]*abb", 568 "[a-zA-Z]*abb", "[+*?]*abb", "[^+*?]*abb" }; 569 570 String[] inputSecuence = { "kkkk", /* "admpabb", */ "abcabcd124654abb", 571 "abcabccbacababb", "dededededededeedabb", "gfdhfghgdfghabb", 572 "accabacbcbaabb", "acbvfgtyabb", "adbcacdbmopabcoabb", 573 "jhfkjhaSDFGHJkdfhHNJMjkhfabb", "+*??+*abb", "sdfghjkabb" }; 574 575 Pattern pat; 576 577 for (int i = 0; i < correctTestPatterns.length; i++) { 578 assertTrue("pattern: " + correctTestPatterns[i] + " input: " 579 + inputSecuence[i], Pattern.matches(correctTestPatterns[i], 580 inputSecuence[i])); 581 582 } 583 584 String[] wrongInputSecuence = { "]", /* "admpkk", */ "abcabcd124k654abb", 585 "abwcabccbacababb", "abababdeababdeabb", "abcabcacbacbabb", 586 "acdcbecbaabb", "acbotyabb", "adbcaecdbmopabcoabb", 587 "jhfkjhaSDFGHJk;dfhHNJMjkhfabb", "+*?a?+*abb", "sdf+ghjkabb" }; 588 589 for (int i = 0; i < correctTestPatterns.length; i++) { 590 assertFalse("pattern: " + correctTestPatterns[i] + " input: " 591 + wrongInputSecuence[i], Pattern.matches( 592 correctTestPatterns[i], wrongInputSecuence[i])); 593 594 } 595 } 596 597 public void testRangesSpecialCases() { 598 String neg_patterns[] = { "[a-&&[b-c]]", "[a-\\w]", "[b-a]", "[]" }; 599 600 for (String element : neg_patterns) { 601 try { 602 Pattern.compile(element); 603 fail("PatternSyntaxException was expected: " + element); 604 } catch (PatternSyntaxException pse) { 605 } 606 } 607 608 String pos_patterns[] = { "[-]+", "----", "[a-]+", "a-a-a-a-aa--", 609 "[\\w-a]+", "123-2312--aaa-213", "[a-]]+", "-]]]]]]]]]]]]]]]" }; 610 611 for (int i = 0; i < pos_patterns.length; i++) { 612 String pat = pos_patterns[i++]; 613 String inp = pos_patterns[i]; 614 assertTrue("pattern: " + pat + " input: " + inp, Pattern.matches( 615 pat, inp)); 616 } 617 } 618 // END android-changed 619 620 public void testZeroSymbols() { 621 assertTrue(Pattern.matches("[\0]*abb", "\0\0\0\0\0\0abb")); 622 } 623 624 public void testEscapes() { 625 Pattern pat = Pattern.compile("\\Q{]()*?"); 626 Matcher mat = pat.matcher("{]()*?"); 627 628 assertTrue(mat.matches()); 629 } 630 631 public void testBug181() { 632 Pattern.compile("[\\t-\\r]"); 633 } 634 635 public void testOrphanQuantifiers() { 636 try { 637 Pattern.compile("+++++"); 638 fail("PatternSyntaxException expected"); 639 } catch (PatternSyntaxException pse) { 640 } 641 } 642 643 public void testOrphanQuantifiers2() { 644 try { 645 Pattern pat = Pattern.compile("\\d+*"); 646 fail("PatternSyntaxException expected"); 647 } catch (PatternSyntaxException pse) { 648 } 649 } 650 651 public void testBug197() { 652 Object[] vals = { ":", new Integer(2), 653 new String[] { "boo", "and:foo" }, ":", new Integer(5), 654 new String[] { "boo", "and", "foo" }, ":", new Integer(-2), 655 new String[] { "boo", "and", "foo" }, ":", new Integer(3), 656 new String[] { "boo", "and", "foo" }, ":", new Integer(1), 657 new String[] { "boo:and:foo" }, "o", new Integer(5), 658 new String[] { "b", "", ":and:f", "", "" }, "o", 659 new Integer(4), new String[] { "b", "", ":and:f", "o" }, "o", 660 new Integer(-2), new String[] { "b", "", ":and:f", "", "" }, 661 "o", new Integer(0), new String[] { "b", "", ":and:f" } }; 662 663 for (int i = 0; i < vals.length / 3;) { 664 String[] res = Pattern.compile(vals[i++].toString()).split( 665 "boo:and:foo", ((Integer) vals[i++]).intValue()); 666 String[] expectedRes = (String[]) vals[i++]; 667 668 assertEquals(expectedRes.length, res.length); 669 670 for (int j = 0; j < expectedRes.length; j++) { 671 assertEquals(expectedRes[j], res[j]); 672 } 673 } 674 } 675 676 public void testURIPatterns() { 677 String URI_REGEXP_STR = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; 678 String SCHEME_REGEXP_STR = "^[a-zA-Z]{1}[\\w+-.]+$"; 679 String REL_URI_REGEXP_STR = "^(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; 680 String IPV6_REGEXP_STR = "^[0-9a-fA-F\\:\\.]+(\\%\\w+)?$"; 681 String IPV6_REGEXP_STR2 = "^\\[[0-9a-fA-F\\:\\.]+(\\%\\w+)?\\]$"; 682 String IPV4_REGEXP_STR = "^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$"; 683 String HOSTNAME_REGEXP_STR = "\\w+[\\w\\-\\.]*"; 684 685 Pattern URI_REGEXP = Pattern.compile(URI_REGEXP_STR); 686 Pattern REL_URI_REGEXP = Pattern.compile(REL_URI_REGEXP_STR); 687 Pattern SCHEME_REGEXP = Pattern.compile(SCHEME_REGEXP_STR); 688 Pattern IPV4_REGEXP = Pattern.compile(IPV4_REGEXP_STR); 689 Pattern IPV6_REGEXP = Pattern.compile(IPV6_REGEXP_STR); 690 Pattern IPV6_REGEXP2 = Pattern.compile(IPV6_REGEXP_STR2); 691 Pattern HOSTNAME_REGEXP = Pattern.compile(HOSTNAME_REGEXP_STR); 692 } 693 694 public void testFindBoundaryCases1() { 695 Pattern pat = Pattern.compile(".*\n"); 696 Matcher mat = pat.matcher("a\n"); 697 698 mat.find(); 699 assertEquals("a\n", mat.group()); 700 } 701 702 public void testFindBoundaryCases2() { 703 Pattern pat = Pattern.compile(".*A"); 704 Matcher mat = pat.matcher("aAa"); 705 706 mat.find(); 707 assertEquals("aA", mat.group()); 708 } 709 710 public void testFindBoundaryCases3() { 711 Pattern pat = Pattern.compile(".*A"); 712 Matcher mat = pat.matcher("a\naA\n"); 713 714 mat.find(); 715 assertEquals("aA", mat.group()); 716 } 717 718 public void testFindBoundaryCases4() { 719 Pattern pat = Pattern.compile("A.*"); 720 Matcher mat = pat.matcher("A\n"); 721 722 mat.find(); 723 assertEquals("A", mat.group()); 724 } 725 726 public void testFindBoundaryCases5() { 727 Pattern pat = Pattern.compile(".*A.*"); 728 Matcher mat = pat.matcher("\nA\naaa\nA\naaAaa\naaaA\n"); 729 // Matcher mat = pat.matcher("\nA\n"); 730 String[] res = { "A", "A", "aaAaa", "aaaA" }; 731 int k = 0; 732 for (; mat.find(); k++) { 733 assertEquals(res[k], mat.group()); 734 } 735 } 736 737 public void testFindBoundaryCases6() { 738 String[] res = { "", "a", "", "" }; 739 Pattern pat = Pattern.compile(".*"); 740 Matcher mat = pat.matcher("\na\n"); 741 int k = 0; 742 743 for (; mat.find(); k++) { 744 assertEquals(res[k], mat.group()); 745 } 746 } 747 748 public void testBackReferences() { 749 Pattern pat = Pattern.compile("(\\((\\w*):(.*):(\\2)\\))"); 750 Matcher mat = pat 751 .matcher("(start1: word :start1)(start2: word :start2)"); 752 int k = 1; 753 for (; mat.find(); k++) { 754 assertEquals("start" + k, mat.group(2)); 755 assertEquals(" word ", mat.group(3)); 756 assertEquals("start" + k, mat.group(4)); 757 } 758 759 assertEquals(3, k); 760 pat = Pattern.compile(".*(.)\\1"); 761 mat = pat.matcher("saa"); 762 assertTrue(mat.matches()); 763 } 764 765 public void testNewLine() { 766 Pattern pat = Pattern.compile("(^$)*\n", Pattern.MULTILINE); 767 Matcher mat = pat.matcher("\r\n\n"); 768 int counter = 0; 769 while (mat.find()) { 770 counter++; 771 } 772 assertEquals(2, counter); 773 } 774 775 public void testFindGreedy() { 776 Pattern pat = Pattern.compile(".*aaa", Pattern.DOTALL); 777 Matcher mat = pat.matcher("aaaa\naaa\naaaaaa"); 778 mat.matches(); 779 assertEquals(15, mat.end()); 780 } 781 782 public void testSerialization() throws Exception { 783 Pattern pat = Pattern.compile("a*bc"); 784 SerializableAssert comparator = new SerializableAssert() { 785 public void assertDeserialized(Serializable initial, 786 Serializable deserialized) { 787 assertEquals(((Pattern) initial).toString(), 788 ((Pattern) deserialized).toString()); 789 } 790 }; 791 SerializationTest.verifyGolden(this, pat, comparator); 792 SerializationTest.verifySelf(pat, comparator); 793 } 794 795 public void testSOLQuant() { 796 Pattern pat = Pattern.compile("$*", Pattern.MULTILINE); 797 Matcher mat = pat.matcher("\n\n"); 798 int counter = 0; 799 while (mat.find()) { 800 counter++; 801 } 802 803 assertEquals(3, counter); 804 } 805 806 public void testIllegalEscape() { 807 try { 808 Pattern.compile("\\y"); 809 fail("PatternSyntaxException expected"); 810 } catch (PatternSyntaxException pse) { 811 } 812 } 813 814 public void testEmptyFamily() { 815 Pattern.compile("\\p{Lower}"); 816 String a = "*"; 817 } 818 819 public void testNonCaptConstr() { 820 // Flags 821 Pattern pat = Pattern.compile("(?i)b*(?-i)a*"); 822 assertTrue(pat.matcher("bBbBaaaa").matches()); 823 assertFalse(pat.matcher("bBbBAaAa").matches()); 824 825 // Non-capturing groups 826 pat = Pattern.compile("(?i:b*)a*"); 827 assertTrue(pat.matcher("bBbBaaaa").matches()); 828 assertFalse(pat.matcher("bBbBAaAa").matches()); 829 830 pat = Pattern 831 // 1 2 3 4 5 6 7 8 9 10 11 832 .compile("(?:-|(-?\\d+\\d\\d\\d))?(?:-|-(\\d\\d))?(?:-|-(\\d\\d))?(T)?(?:(\\d\\d):(\\d\\d):(\\d\\d)(\\.\\d+)?)?(?:(?:((?:\\+|\\-)\\d\\d):(\\d\\d))|(Z))?"); 833 Matcher mat = pat.matcher("-1234-21-31T41:51:61.789+71:81"); 834 assertTrue(mat.matches()); 835 assertEquals("-1234", mat.group(1)); 836 assertEquals("21", mat.group(2)); 837 assertEquals("31", mat.group(3)); 838 assertEquals("T", mat.group(4)); 839 assertEquals("41", mat.group(5)); 840 assertEquals("51", mat.group(6)); 841 assertEquals("61", mat.group(7)); 842 assertEquals(".789", mat.group(8)); 843 assertEquals("+71", mat.group(9)); 844 assertEquals("81", mat.group(10)); 845 846 // positive lookahead 847 pat = Pattern.compile(".*\\.(?=log$).*$"); 848 assertTrue(pat.matcher("a.b.c.log").matches()); 849 assertFalse(pat.matcher("a.b.c.log.").matches()); 850 851 // negative lookahead 852 pat = Pattern.compile(".*\\.(?!log$).*$"); 853 assertFalse(pat.matcher("abc.log").matches()); 854 assertTrue(pat.matcher("abc.logg").matches()); 855 856 // positive lookbehind 857 pat = Pattern.compile(".*(?<=abc)\\.log$"); 858 assertFalse(pat.matcher("cde.log").matches()); 859 assertTrue(pat.matcher("abc.log").matches()); 860 861 // negative lookbehind 862 pat = Pattern.compile(".*(?<!abc)\\.log$"); 863 assertTrue(pat.matcher("cde.log").matches()); 864 assertFalse(pat.matcher("abc.log").matches()); 865 866 // atomic group 867 pat = Pattern.compile("(?>a*)abb"); 868 assertFalse(pat.matcher("aaabb").matches()); 869 pat = Pattern.compile("(?>a*)bb"); 870 assertTrue(pat.matcher("aaabb").matches()); 871 872 pat = Pattern.compile("(?>a|aa)aabb"); 873 assertTrue(pat.matcher("aaabb").matches()); 874 pat = Pattern.compile("(?>aa|a)aabb"); 875 assertFalse(pat.matcher("aaabb").matches()); 876 877// BEGIN android-removed 878// Questionable constructs that ICU doesn't support. 879// // quantifiers over look ahead 880// pat = Pattern.compile(".*(?<=abc)*\\.log$"); 881// assertTrue(pat.matcher("cde.log").matches()); 882// pat = Pattern.compile(".*(?<=abc)+\\.log$"); 883// assertFalse(pat.matcher("cde.log").matches()); 884// END android-removed 885 886 } 887 888 public void testCorrectReplacementBackreferencedJointSet() { 889 Pattern pat = Pattern.compile("ab(a)*\\1"); 890 pat = Pattern.compile("abc(cd)fg"); 891 pat = Pattern.compile("aba*cd"); 892 pat = Pattern.compile("ab(a)*+cd"); 893 pat = Pattern.compile("ab(a)*?cd"); 894 pat = Pattern.compile("ab(a)+cd"); 895 pat = Pattern.compile(".*(.)\\1"); 896 pat = Pattern.compile("ab((a)|c|d)e"); 897 pat = Pattern.compile("abc((a(b))cd)"); 898 pat = Pattern.compile("ab(a)++cd"); 899 pat = Pattern.compile("ab(a)?(c)d"); 900 pat = Pattern.compile("ab(a)?+cd"); 901 pat = Pattern.compile("ab(a)??cd"); 902 pat = Pattern.compile("ab(a)??cd"); 903 pat = Pattern.compile("ab(a){1,3}?(c)d"); 904 } 905 906 public void testCompilePatternWithTerminatorMark() { 907 Pattern pat = Pattern.compile("a\u0000\u0000cd"); 908 Matcher mat = pat.matcher("a\u0000\u0000cd"); 909 assertTrue(mat.matches()); 910 } 911 912 public void testAlternations() { 913 String baseString = "|a|bc"; 914 Pattern pat = Pattern.compile(baseString); 915 Matcher mat = pat.matcher(""); 916 917 assertTrue(mat.matches()); 918 919 baseString = "a||bc"; 920 pat = Pattern.compile(baseString); 921 mat = pat.matcher(""); 922 assertTrue(mat.matches()); 923 924 baseString = "a|bc|"; 925 pat = Pattern.compile(baseString); 926 mat = pat.matcher(""); 927 assertTrue(mat.matches()); 928 929 baseString = "a|b|"; 930 pat = Pattern.compile(baseString); 931 mat = pat.matcher(""); 932 assertTrue(mat.matches()); 933 934 baseString = "a(|b|cd)e"; 935 pat = Pattern.compile(baseString); 936 mat = pat.matcher("ae"); 937 assertTrue(mat.matches()); 938 939 baseString = "a(b||cd)e"; 940 pat = Pattern.compile(baseString); 941 mat = pat.matcher("ae"); 942 assertTrue(mat.matches()); 943 944 baseString = "a(b|cd|)e"; 945 pat = Pattern.compile(baseString); 946 mat = pat.matcher("ae"); 947 assertTrue(mat.matches()); 948 949 baseString = "a(b|c|)e"; 950 pat = Pattern.compile(baseString); 951 mat = pat.matcher("ae"); 952 assertTrue(mat.matches()); 953 954 baseString = "a(|)e"; 955 pat = Pattern.compile(baseString); 956 mat = pat.matcher("ae"); 957 assertTrue(mat.matches()); 958 959 baseString = "|"; 960 pat = Pattern.compile(baseString); 961 mat = pat.matcher(""); 962 assertTrue(mat.matches()); 963 964 baseString = "a(?:|)e"; 965 pat = Pattern.compile(baseString); 966 mat = pat.matcher("ae"); 967 assertTrue(mat.matches()); 968 969 baseString = "a||||bc"; 970 pat = Pattern.compile(baseString); 971 mat = pat.matcher(""); 972 assertTrue(mat.matches()); 973 974 baseString = "(?i-is)|a"; 975 pat = Pattern.compile(baseString); 976 mat = pat.matcher("a"); 977 assertTrue(mat.matches()); 978 } 979 980 public void testMatchWithGroups() { 981 String baseString = "jwkerhjwehrkwjehrkwjhrwkjehrjwkehrjkwhrkwehrkwhrkwrhwkhrwkjehr"; 982 String pattern = ".*(..).*\\1.*"; 983 assertTrue(Pattern.compile(pattern).matcher(baseString).matches()); 984 985 baseString = "saa"; 986 pattern = ".*(.)\\1"; 987 assertTrue(Pattern.compile(pattern).matcher(baseString).matches()); 988 assertTrue(Pattern.compile(pattern).matcher(baseString).find()); 989 } 990 991 public void testSplitEmptyCharSequence() { 992 String s1 = ""; 993 String[] arr = s1.split(":"); 994 assertEquals(arr.length, 1); 995 } 996 997 public void testSplitEndsWithPattern() { 998 assertEquals(",,".split(",", 3).length, 3); 999 assertEquals(",,".split(",", 4).length, 3); 1000 1001 assertEquals(Pattern.compile("o").split("boo:and:foo", 5).length, 5); 1002 assertEquals(Pattern.compile("b").split("ab", -1).length, 2); 1003 } 1004 1005 public void testCaseInsensitiveFlag() { 1006 assertTrue(Pattern.matches("(?i-:AbC)", "ABC")); 1007 } 1008 1009 public void testEmptyGroups() { 1010 Pattern pat = Pattern.compile("ab(?>)cda"); 1011 Matcher mat = pat.matcher("abcda"); 1012 assertTrue(mat.matches()); 1013 1014 pat = Pattern.compile("ab()"); 1015 mat = pat.matcher("ab"); 1016 assertTrue(mat.matches()); 1017 1018 pat = Pattern.compile("abc(?:)(..)"); 1019 mat = pat.matcher("abcgf"); 1020 assertTrue(mat.matches()); 1021 } 1022 1023 public void testCompileNonCaptGroup() { 1024 boolean isCompiled = false; 1025 1026 try { 1027// BEGIN android-change 1028// We don't have canonical equivalence. 1029 Pattern pat = Pattern.compile("(?:)"); 1030 pat = Pattern.compile("(?:)", Pattern.DOTALL); 1031 pat = Pattern.compile("(?:)", Pattern.CASE_INSENSITIVE); 1032 pat = Pattern.compile("(?:)", Pattern.COMMENTS | Pattern.UNIX_LINES); 1033// END android-change 1034 isCompiled = true; 1035 } catch (PatternSyntaxException e) { 1036 System.out.println(e); 1037 } 1038 assertTrue(isCompiled); 1039 } 1040 1041 public void testEmbeddedFlags() { 1042 String baseString = "(?i)((?s)a)"; 1043 String testString = "A"; 1044 Pattern pat = Pattern.compile(baseString); 1045 Matcher mat = pat.matcher(testString); 1046 assertTrue(mat.matches()); 1047 1048 baseString = "(?x)(?i)(?s)(?d)a"; 1049 testString = "A"; 1050 pat = Pattern.compile(baseString); 1051 mat = pat.matcher(testString); 1052 assertTrue(mat.matches()); 1053 1054 baseString = "(?x)(?i)(?s)(?d)a."; 1055 testString = "a\n"; 1056 pat = Pattern.compile(baseString); 1057 mat = pat.matcher(testString); 1058 assertTrue(mat.matches()); 1059 1060 baseString = "abc(?x:(?i)(?s)(?d)a.)"; 1061 testString = "abcA\n"; 1062 pat = Pattern.compile(baseString); 1063 mat = pat.matcher(testString); 1064 assertTrue(mat.matches()); 1065 1066 baseString = "abc((?x)d)(?i)(?s)a"; 1067 testString = "abcdA"; 1068 pat = Pattern.compile(baseString); 1069 mat = pat.matcher(testString); 1070 assertTrue(mat.matches()); 1071 } 1072 1073 public void testAltWithFlags() { 1074 boolean isCompiled = false; 1075 1076 try { 1077 Pattern pat = Pattern.compile("|(?i-xi)|()"); 1078 isCompiled = true; 1079 } catch (PatternSyntaxException e) { 1080 System.out.println(e); 1081 } 1082 assertTrue(isCompiled); 1083 } 1084 1085 public void testRestoreFlagsAfterGroup() { 1086 String baseString = "abc((?x)d) a"; 1087 String testString = "abcd a"; 1088 Pattern pat = Pattern.compile(baseString); 1089 Matcher mat = pat.matcher(testString); 1090 1091 assertTrue(mat.matches()); 1092 } 1093 1094 /* 1095 * Verify if the Pattern support the following character classes: 1096 * \p{javaLowerCase} \p{javaUpperCase} \p{javaWhitespace} \p{javaMirrored} 1097 */ 1098 public void testCompileCharacterClass() { 1099 // Regression for HARMONY-606, 696 1100 Pattern pattern = Pattern.compile("\\p{javaLowerCase}"); 1101 assertNotNull(pattern); 1102 1103 pattern = Pattern.compile("\\p{javaUpperCase}"); 1104 assertNotNull(pattern); 1105 1106 pattern = Pattern.compile("\\p{javaWhitespace}"); 1107 assertNotNull(pattern); 1108 1109 pattern = Pattern.compile("\\p{javaMirrored}"); 1110 assertNotNull(pattern); 1111 1112 pattern = Pattern.compile("\\p{javaDefined}"); 1113 assertNotNull(pattern); 1114 1115 pattern = Pattern.compile("\\p{javaDigit}"); 1116 assertNotNull(pattern); 1117 1118 pattern = Pattern.compile("\\p{javaIdentifierIgnorable}"); 1119 assertNotNull(pattern); 1120 1121 pattern = Pattern.compile("\\p{javaISOControl}"); 1122 assertNotNull(pattern); 1123 1124 pattern = Pattern.compile("\\p{javaJavaIdentifierPart}"); 1125 assertNotNull(pattern); 1126 1127 pattern = Pattern.compile("\\p{javaJavaIdentifierStart}"); 1128 assertNotNull(pattern); 1129 1130 pattern = Pattern.compile("\\p{javaLetter}"); 1131 assertNotNull(pattern); 1132 1133 pattern = Pattern.compile("\\p{javaLetterOrDigit}"); 1134 assertNotNull(pattern); 1135 1136 pattern = Pattern.compile("\\p{javaSpaceChar}"); 1137 assertNotNull(pattern); 1138 1139 pattern = Pattern.compile("\\p{javaTitleCase}"); 1140 assertNotNull(pattern); 1141 1142 pattern = Pattern.compile("\\p{javaUnicodeIdentifierPart}"); 1143 assertNotNull(pattern); 1144 1145 pattern = Pattern.compile("\\p{javaUnicodeIdentifierStart}"); 1146 assertNotNull(pattern); 1147 } 1148 1149 /** 1150 * s original test was fixed to pass on RI 1151 */ 1152 1153// BEGIN android-removed 1154// We don't have canonical equivalence. 1155// public void testCanonEqFlag() { 1156// 1157// /* 1158// * for decompositions see 1159// * http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt 1160// * http://www.unicode.org/reports/tr15/#Decomposition 1161// */ 1162// String baseString; 1163// String testString; 1164// Pattern pat; 1165// Matcher mat; 1166// 1167// baseString = "ab(a*)\\1"; 1168// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1169// 1170// baseString = "a(abcdf)d"; 1171// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1172// 1173// baseString = "aabcdfd"; 1174// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1175// 1176// // \u01E0 -> \u0226\u0304 ->\u0041\u0307\u0304 1177// // \u00CC -> \u0049\u0300 1178// 1179// /* 1180// * baseString = "\u01E0\u00CCcdb(ac)"; testString = 1181// * "\u0226\u0304\u0049\u0300cdbac"; pat = Pattern.compile(baseString, 1182// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1183// * assertTrue(mat.matches()); 1184// */ 1185// baseString = "\u01E0cdb(a\u00CCc)"; 1186// testString = "\u0041\u0307\u0304cdba\u0049\u0300c"; 1187// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1188// mat = pat.matcher(testString); 1189// assertTrue(mat.matches()); 1190// 1191// baseString = "a\u00CC"; 1192// testString = "a\u0049\u0300"; 1193// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1194// mat = pat.matcher(testString); 1195// assertTrue(mat.matches()); 1196// 1197// /* 1198// * baseString = "\u0226\u0304cdb(ac\u0049\u0300)"; testString = 1199// * "\u01E0cdbac\u00CC"; pat = Pattern.compile(baseString, 1200// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1201// * assertTrue(mat.matches()); 1202// * 1203// * baseString = "cdb(?:\u0041\u0307\u0304\u00CC)"; testString = 1204// * "cdb\u0226\u0304\u0049\u0300"; pat = Pattern.compile(baseString, 1205// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1206// * assertTrue(mat.matches()); 1207// * 1208// * baseString = "\u01E0[a-c]\u0049\u0300cdb(ac)"; testString = 1209// * "\u01E0b\u00CCcdbac"; pat = Pattern.compile(baseString, 1210// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1211// * assertTrue(mat.matches()); 1212// * 1213// * baseString = "\u01E0|\u00CCcdb(ac)"; testString = 1214// * "\u0041\u0307\u0304"; pat = Pattern.compile(baseString, 1215// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1216// * assertTrue(mat.matches()); 1217// * 1218// * baseString = "\u00CC?cdb(ac)*(\u01E0)*[a-c]"; testString = 1219// * "cdb\u0041\u0307\u0304b"; pat = Pattern.compile(baseString, 1220// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1221// * assertTrue(mat.matches()); 1222// */ 1223// baseString = "a\u0300"; 1224// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1225// mat = pat.matcher("a\u00E0a"); 1226// assertTrue(mat.find()); 1227// 1228// /* 1229// * baseString = "\u7B20\uF9F8abc"; pat = Pattern.compile(baseString, 1230// * Pattern.CANON_EQ); mat = pat.matcher("\uF9F8\uF9F8abc"); 1231// * assertTrue(mat.matches()); 1232// * 1233// * //\u01F9 -> \u006E\u0300 //\u00C3 -> \u0041\u0303 1234// * 1235// * baseString = "cdb(?:\u00C3\u006E\u0300)"; testString = 1236// * "cdb\u0041\u0303\u01F9"; pat = Pattern.compile(baseString, 1237// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1238// * assertTrue(mat.matches()); 1239// * 1240// * //\u014C -> \u004F\u0304 //\u0163 -> \u0074\u0327 1241// * 1242// * baseString = "cdb(?:\u0163\u004F\u0304)"; testString = 1243// * "cdb\u0074\u0327\u014C"; pat = Pattern.compile(baseString, 1244// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1245// * assertTrue(mat.matches()); 1246// */ 1247// // \u00E1->a\u0301 1248// // canonical ordering takes place \u0301\u0327 -> \u0327\u0301 1249// baseString = "c\u0327\u0301"; 1250// testString = "c\u0301\u0327"; 1251// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1252// mat = pat.matcher(testString); 1253// assertTrue(mat.matches()); 1254// 1255// /* 1256// * Hangul decompositions 1257// */ 1258// // \uD4DB->\u1111\u1171\u11B6 1259// // \uD21E->\u1110\u116D\u11B5 1260// // \uD264->\u1110\u1170 1261// // not Hangul:\u0453->\u0433\u0301 1262// baseString = "a\uD4DB\u1111\u1171\u11B6\uD264"; 1263// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1264// 1265// baseString = "\u0453c\uD4DB"; 1266// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1267// 1268// baseString = "a\u1110\u116D\u11B5b\uD21Ebc"; 1269// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1270// 1271// /* 1272// * baseString = "\uD4DB\uD21E\u1110\u1170cdb(ac)"; testString = 1273// * "\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac"; pat = 1274// * Pattern.compile(baseString, Pattern.CANON_EQ); mat = 1275// * pat.matcher(testString); assertTrue(mat.matches()); 1276// */ 1277// baseString = "\uD4DB\uD264cdb(a\uD21Ec)"; 1278// testString = "\u1111\u1171\u11B6\u1110\u1170cdba\u1110\u116D\u11B5c"; 1279// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1280// mat = pat.matcher(testString); 1281// assertTrue(mat.matches()); 1282// 1283// baseString = "a\uD4DB"; 1284// testString = "a\u1111\u1171\u11B6"; 1285// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1286// mat = pat.matcher(testString); 1287// assertTrue(mat.matches()); 1288// 1289// baseString = "a\uD21E"; 1290// testString = "a\u1110\u116D\u11B5"; 1291// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1292// mat = pat.matcher(testString); 1293// assertTrue(mat.matches()); 1294// 1295// /* 1296// * baseString = "\u1111\u1171\u11B6cdb(ac\u1110\u116D\u11B5)"; 1297// * testString = "\uD4DBcdbac\uD21E"; pat = Pattern.compile(baseString, 1298// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1299// * assertTrue(mat.matches()); 1300// * 1301// * baseString = "cdb(?:\u1111\u1171\u11B6\uD21E)"; testString = 1302// * "cdb\uD4DB\u1110\u116D\u11B5"; pat = Pattern.compile(baseString, 1303// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1304// * assertTrue(mat.matches()); 1305// * 1306// * baseString = "\uD4DB[a-c]\u1110\u116D\u11B5cdb(ac)"; testString = 1307// * "\uD4DBb\uD21Ecdbac"; pat = Pattern.compile(baseString, 1308// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1309// * assertTrue(mat.matches()); 1310// */ 1311// baseString = "\uD4DB|\u00CCcdb(ac)"; 1312// testString = "\u1111\u1171\u11B6"; 1313// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1314// mat = pat.matcher(testString); 1315// assertTrue(mat.matches()); 1316// 1317// baseString = "\uD4DB|\u00CCcdb(ac)"; 1318// testString = "\u1111\u1171"; 1319// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1320// mat = pat.matcher(testString); 1321// assertFalse(mat.matches()); 1322// 1323// baseString = "\u00CC?cdb(ac)*(\uD4DB)*[a-c]"; 1324// testString = "cdb\u1111\u1171\u11B6b"; 1325// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1326// mat = pat.matcher(testString); 1327// assertTrue(mat.matches()); 1328// 1329// baseString = "\uD4DB"; 1330// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1331// mat = pat.matcher("a\u1111\u1171\u11B6a"); 1332// assertTrue(mat.find()); 1333// 1334// baseString = "\u1111"; 1335// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1336// mat = pat.matcher("bcda\uD4DBr"); 1337// assertFalse(mat.find()); 1338// } 1339// 1340// /** 1341// * s original test was fixed to pass on RI 1342// */ 1343// 1344// public void testIndexesCanonicalEq() { 1345// String baseString; 1346// String testString; 1347// Pattern pat; 1348// Matcher mat; 1349// 1350// baseString = "\uD4DB"; 1351// pat = Pattern.compile(baseString, Pattern.CANON_EQ); 1352// mat = pat.matcher("bcda\u1111\u1171\u11B6awr"); 1353// assertTrue(mat.find()); 1354// assertEquals(mat.start(), 4); 1355// assertEquals(mat.end(), 7); 1356// 1357// /* 1358// * baseString = "\uD4DB\u1111\u1171\u11B6"; pat = 1359// * Pattern.compile(baseString, Pattern.CANON_EQ); mat = 1360// * pat.matcher("bcda\u1111\u1171\u11B6\uD4DBawr"); 1361// * assertTrue(mat.find()); assertEquals(mat.start(), 4); 1362// * assertEquals(mat.end(), 8); 1363// * 1364// * baseString = "\uD4DB\uD21E\u1110\u1170"; testString = 1365// * "abcabc\u1111\u1171\u11B6\u1110\u116D\u11B5\uD264cdbac"; pat = 1366// * Pattern.compile(baseString, Pattern.CANON_EQ); mat = 1367// * pat.matcher(testString); assertTrue(mat.find()); 1368// * assertEquals(mat.start(), 6); assertEquals(mat.end(), 13); 1369// */} 1370// 1371// /** 1372// * s original test was fixed to pass on RI 1373// */ 1374// 1375// public void testCanonEqFlagWithSupplementaryCharacters() { 1376// 1377// /* 1378// * \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32 1379// * \uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F 1380// * ->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16 1381// */ 1382// String patString = "abc\uD834\uDDBFef"; 1383// String testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; 1384// Pattern pat = Pattern.compile(patString, Pattern.CANON_EQ); 1385// Matcher mat = pat.matcher(testString); 1386// assertTrue(mat.matches()); 1387// /* 1388// * testString = "abc\uD834\uDDBB\uD834\uDD6Fef"; mat = 1389// * pat.matcher(testString); assertTrue(mat.matches()); 1390// * 1391// * patString = "abc\uD834\uDDBB\uD834\uDD6Fef"; testString = 1392// * "abc\uD834\uDDBFef"; pat = Pattern.compile(patString, 1393// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1394// * assertTrue(mat.matches()); 1395// */ 1396// testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; 1397// mat = pat.matcher(testString); 1398// assertTrue(mat.matches()); 1399// /* 1400// * patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef"; testString = 1401// * "abc\uD834\uDDBFef"; pat = Pattern.compile(patString, 1402// * Pattern.CANON_EQ); mat = pat.matcher(testString); 1403// * assertTrue(mat.matches()); 1404// * 1405// * testString = "abc\uD834\uDDBB\uD834\uDD6Fef"; mat = 1406// * pat.matcher(testString); assertTrue(mat.matches()); 1407// */ 1408// /* 1409// * testSupplementary characters with no decomposition 1410// */ 1411// /* 1412// * patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef"; 1413// * testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef"; pat = 1414// * Pattern.compile(patString, Pattern.CANON_EQ); mat = 1415// * pat.matcher(testString); assertTrue(mat.matches()); 1416// */} 1417// END android-removed 1418 1419 public void testRangesWithSurrogatesSupplementary() { 1420 String patString = "[abc\uD8D2]"; 1421 String testString = "\uD8D2"; 1422 Pattern pat = Pattern.compile(patString); 1423 Matcher mat = pat.matcher(testString); 1424 assertTrue(mat.matches()); 1425 1426 testString = "a"; 1427 mat = pat.matcher(testString); 1428 assertTrue(mat.matches()); 1429 1430 testString = "ef\uD8D2\uDD71gh"; 1431 mat = pat.matcher(testString); 1432 assertFalse(mat.find()); 1433 1434 testString = "ef\uD8D2gh"; 1435 mat = pat.matcher(testString); 1436 assertTrue(mat.find()); 1437 1438 patString = "[abc\uD8D3&&[c\uD8D3]]"; 1439 testString = "c"; 1440 pat = Pattern.compile(patString); 1441 mat = pat.matcher(testString); 1442 assertTrue(mat.matches()); 1443 1444 testString = "a"; 1445 mat = pat.matcher(testString); 1446 assertFalse(mat.matches()); 1447 1448 testString = "ef\uD8D3\uDD71gh"; 1449 mat = pat.matcher(testString); 1450 assertFalse(mat.find()); 1451 1452 testString = "ef\uD8D3gh"; 1453 mat = pat.matcher(testString); 1454 assertTrue(mat.find()); 1455 1456 patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]"; 1457 testString = "c"; 1458 pat = Pattern.compile(patString); 1459 mat = pat.matcher(testString); 1460 assertTrue(mat.matches()); 1461 1462 testString = "\uDBEE\uDF0C"; 1463 mat = pat.matcher(testString); 1464 assertTrue(mat.matches()); 1465 1466 testString = "ef\uD8D3\uDD71gh"; 1467 mat = pat.matcher(testString); 1468 assertFalse(mat.find()); 1469 1470 testString = "ef\uD8D3gh"; 1471 mat = pat.matcher(testString); 1472 assertTrue(mat.find()); 1473 1474 patString = "[abc\uDBFC]\uDDC2cd"; 1475 testString = "\uDBFC\uDDC2cd"; 1476 pat = Pattern.compile(patString); 1477 mat = pat.matcher(testString); 1478 assertFalse(mat.matches()); 1479 1480 testString = "a\uDDC2cd"; 1481 mat = pat.matcher(testString); 1482 assertTrue(mat.matches()); 1483 } 1484 1485 public void testSequencesWithSurrogatesSupplementary() { 1486 String patString = "abcd\uD8D3"; 1487 String testString = "abcd\uD8D3\uDFFC"; 1488 Pattern pat = Pattern.compile(patString); 1489 Matcher mat = pat.matcher(testString); 1490// BEGIN android-changed 1491// This one really doesn't make sense, as the above is a corrupt surrogate. 1492// Even if it's matched by the JDK, it's more of a bug than of a behavior one 1493// might want to duplicate. 1494// assertFalse(mat.find()); 1495// END android-changed 1496 testString = "abcd\uD8D3abc"; 1497 mat = pat.matcher(testString); 1498 assertTrue(mat.find()); 1499 1500 patString = "ab\uDBEFcd"; 1501 testString = "ab\uDBEFcd"; 1502 pat = Pattern.compile(patString); 1503 mat = pat.matcher(testString); 1504 assertTrue(mat.matches()); 1505 1506 patString = "\uDFFCabcd"; 1507 testString = "\uD8D3\uDFFCabcd"; 1508 pat = Pattern.compile(patString); 1509 mat = pat.matcher(testString); 1510 assertFalse(mat.find()); 1511 1512 testString = "abc\uDFFCabcdecd"; 1513 mat = pat.matcher(testString); 1514 assertTrue(mat.find()); 1515 1516 patString = "\uD8D3\uDFFCabcd"; 1517 testString = "abc\uD8D3\uD8D3\uDFFCabcd"; 1518 pat = Pattern.compile(patString); 1519 mat = pat.matcher(testString); 1520 assertTrue(mat.find()); 1521 } 1522 1523 public void testPredefinedClassesWithSurrogatesSupplementary() { 1524 String patString = "[123\\D]"; 1525 String testString = "a"; 1526 Pattern pat = Pattern.compile(patString); 1527 Matcher mat = pat.matcher(testString); 1528 assertTrue(mat.find()); 1529 1530 testString = "5"; 1531 mat = pat.matcher(testString); 1532 assertFalse(mat.find()); 1533 1534 testString = "3"; 1535 mat = pat.matcher(testString); 1536 assertTrue(mat.find()); 1537 1538 // low surrogate 1539 testString = "\uDFC4"; 1540 mat = pat.matcher(testString); 1541 assertTrue(mat.find()); 1542 1543 // high surrogate 1544 testString = "\uDADA"; 1545 mat = pat.matcher(testString); 1546 assertTrue(mat.find()); 1547 1548 testString = "\uDADA\uDFC4"; 1549 mat = pat.matcher(testString); 1550 assertTrue(mat.find()); 1551 1552 patString = "[123[^\\p{javaDigit}]]"; 1553 testString = "a"; 1554 pat = Pattern.compile(patString); 1555 mat = pat.matcher(testString); 1556 assertTrue(mat.find()); 1557 1558 testString = "5"; 1559 mat = pat.matcher(testString); 1560 assertFalse(mat.find()); 1561 1562 testString = "3"; 1563 mat = pat.matcher(testString); 1564 assertTrue(mat.find()); 1565 1566 // low surrogate 1567 testString = "\uDFC4"; 1568 mat = pat.matcher(testString); 1569 assertTrue(mat.find()); 1570 1571 // high surrogate 1572 testString = "\uDADA"; 1573 mat = pat.matcher(testString); 1574 assertTrue(mat.find()); 1575 1576 testString = "\uDADA\uDFC4"; 1577 mat = pat.matcher(testString); 1578 assertTrue(mat.find()); 1579 1580 // surrogate characters 1581 patString = "\\p{Cs}"; 1582 testString = "\uD916\uDE27"; 1583 pat = Pattern.compile(patString); 1584 mat = pat.matcher(testString); 1585 1586 /* 1587 * see http://www.unicode.org/reports/tr18/#Supplementary_Characters we 1588 * have to treat text as code points not code units. \\p{Cs} matches any 1589 * surrogate character but here testString is a one code point 1590 * consisting of two code units (two surrogate characters) so we find 1591 * nothing 1592 */ 1593 // assertFalse(mat.find()); 1594 // swap low and high surrogates 1595 testString = "\uDE27\uD916"; 1596 mat = pat.matcher(testString); 1597 assertTrue(mat.find()); 1598 1599 patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]"; 1600 testString = "1"; 1601 pat = Pattern.compile(patString); 1602 mat = pat.matcher(testString); 1603 assertTrue(mat.find()); 1604 1605 testString = "\uD916"; 1606 pat = Pattern.compile(patString); 1607 mat = pat.matcher(testString); 1608 assertFalse(mat.find()); 1609 1610 testString = "\uD916\uDE27"; 1611 pat = Pattern.compile(patString); 1612 mat = pat.matcher(testString); 1613 assertTrue(mat.find()); 1614 1615 // \uD9A0\uDE8E=\u7828E 1616 // \u78281=\uD9A0\uDE81 1617 patString = "[a-\uD9A0\uDE8E]"; 1618 testString = "\uD9A0\uDE81"; 1619 pat = Pattern.compile(patString); 1620 mat = pat.matcher(testString); 1621 assertTrue(mat.matches()); 1622 } 1623 1624 public void testDotConstructionWithSurrogatesSupplementary() { 1625 String patString = "."; 1626 String testString = "\uD9A0\uDE81"; 1627 Pattern pat = Pattern.compile(patString); 1628 Matcher mat = pat.matcher(testString); 1629 assertTrue(mat.matches()); 1630 1631 testString = "\uDE81"; 1632 mat = pat.matcher(testString); 1633 assertTrue(mat.matches()); 1634 1635 testString = "\uD9A0"; 1636 mat = pat.matcher(testString); 1637 assertTrue(mat.matches()); 1638 1639 testString = "\n"; 1640 mat = pat.matcher(testString); 1641 assertFalse(mat.matches()); 1642 1643 patString = ".*\uDE81"; 1644 testString = "\uD9A0\uDE81\uD9A0\uDE81\uD9A0\uDE81"; 1645 pat = Pattern.compile(patString); 1646 mat = pat.matcher(testString); 1647 assertFalse(mat.matches()); 1648 1649 testString = "\uD9A0\uDE81\uD9A0\uDE81\uDE81"; 1650 mat = pat.matcher(testString); 1651 assertTrue(mat.matches()); 1652 1653 patString = ".*"; 1654 testString = "\uD9A0\uDE81\n\uD9A0\uDE81\uD9A0\n\uDE81"; 1655 pat = Pattern.compile(patString, Pattern.DOTALL); 1656 mat = pat.matcher(testString); 1657 assertTrue(mat.matches()); 1658 } 1659 1660 public void test_quoteLjava_lang_String() { 1661 for (String aPattern : testPatterns) { 1662 Pattern p = Pattern.compile(aPattern); 1663 try { 1664 assertEquals("quote was wrong for plain text", "\\Qtest\\E", p 1665 .quote("test")); 1666 assertEquals("quote was wrong for text with quote sign", 1667 "\\Q\\Qtest\\E", p.quote("\\Qtest")); 1668 assertEquals("quote was wrong for quotted text", 1669 "\\Q\\Qtest\\E\\\\E\\Q\\E", p.quote("\\Qtest\\E")); 1670 } catch (Exception e) { 1671 fail("Unexpected exception: " + e); 1672 } 1673 } 1674 } 1675 1676 public void test_matcherLjava_lang_StringLjava_lang_CharSequence() { 1677 String[][] posSeq = { 1678 { "abb", "ababb", "abababbababb", "abababbababbabababbbbbabb" }, 1679 { "213567", "12324567", "1234567", "213213567", 1680 "21312312312567", "444444567" }, 1681 { "abcdaab", "aab", "abaab", "cdaab", "acbdadcbaab" }, 1682 { "213234567", "3458", "0987654", "7689546432", "0398576", 1683 "98432", "5" }, 1684 { 1685 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", 1686 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 1687 + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" }, 1688 { "ababbaAabababblice", "ababbaAliceababab", "ababbAabliceaaa", 1689 "abbbAbbbliceaaa", "Alice" }, 1690 { "a123", "bnxnvgds156", "for", "while", "if", "struct" }, 1691 { "xy" }, { "xy" }, { "xcy" } 1692 1693 }; 1694 1695 for (int i = 0; i < testPatterns.length; i++) { 1696 for (int j = 0; j < posSeq[i].length; j++) { 1697 assertTrue("Incorrect match: " + testPatterns[i] + " vs " 1698 + posSeq[i][j], Pattern.compile(testPatterns[i]) 1699 .matcher(posSeq[i][j]).matches()); 1700 } 1701 } 1702 } 1703 1704 public void testQuantifiersWithSurrogatesSupplementary() { 1705 String patString = "\uD9A0\uDE81*abc"; 1706 String testString = "\uD9A0\uDE81\uD9A0\uDE81abc"; 1707 Pattern pat = Pattern.compile(patString); 1708 Matcher mat = pat.matcher(testString); 1709 assertTrue(mat.matches()); 1710 1711 testString = "abc"; 1712 mat = pat.matcher(testString); 1713 assertTrue(mat.matches()); 1714 } 1715 1716 public void testAlternationsWithSurrogatesSupplementary() { 1717 String patString = "\uDE81|\uD9A0\uDE81|\uD9A0"; 1718 String testString = "\uD9A0"; 1719 Pattern pat = Pattern.compile(patString); 1720 Matcher mat = pat.matcher(testString); 1721 assertTrue(mat.matches()); 1722 1723 testString = "\uDE81"; 1724 mat = pat.matcher(testString); 1725 assertTrue(mat.matches()); 1726 1727 testString = "\uD9A0\uDE81"; 1728 mat = pat.matcher(testString); 1729 assertTrue(mat.matches()); 1730 1731 testString = "\uDE81\uD9A0"; 1732 mat = pat.matcher(testString); 1733 assertFalse(mat.matches()); 1734 } 1735 1736 public void testGroupsWithSurrogatesSupplementary() { 1737 1738 //this pattern matches nothing 1739 String patString = "(\uD9A0)\uDE81"; 1740 String testString = "\uD9A0\uDE81"; 1741 Pattern pat = Pattern.compile(patString); 1742 Matcher mat = pat.matcher(testString); 1743 assertFalse(mat.matches()); 1744 1745 patString = "(\uD9A0)"; 1746 testString = "\uD9A0\uDE81"; 1747 pat = Pattern.compile(patString, Pattern.DOTALL); 1748 mat = pat.matcher(testString); 1749 assertFalse(mat.find()); 1750 } 1751 1752 /* 1753 * Regression test for HARMONY-688 1754 */ 1755 public void testUnicodeCategoryWithSurrogatesSupplementary() { 1756 Pattern p = Pattern.compile("\\p{javaLowerCase}"); 1757 Matcher matcher = p.matcher("\uD801\uDC28"); 1758 assertTrue(matcher.find()); 1759 } 1760 1761 public void testSplitEmpty() { 1762 1763 Pattern pat = Pattern.compile(""); 1764 String[] s = pat.split("", -1); 1765 1766 assertEquals(1, s.length); 1767 assertEquals("", s[0]); 1768 } 1769 1770 public void testToString() { 1771 for (int i = 0; i < testPatterns.length; i++) { 1772 Pattern p = Pattern.compile(testPatterns[i]); 1773 assertEquals(testPatterns[i], p.toString()); 1774 } 1775 } 1776 1777} 1778