1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/* 3 ******************************************************************************* 4 * Copyright (C) 1996-2014, International Business Machines Corporation and * 5 * others. All Rights Reserved. * 6 ******************************************************************************* 7 */ 8package android.icu.dev.test.rbbi; 9 10import java.text.StringCharacterIterator; 11import java.util.ArrayList; 12import java.util.List; 13import java.util.Locale; 14 15import android.icu.dev.test.TestFmwk; 16import android.icu.text.BreakIterator; 17import android.icu.text.FilteredBreakIteratorBuilder; 18import android.icu.util.ULocale; 19import org.junit.runner.RunWith; 20import android.icu.junit.IcuTestFmwkRunner; 21 22@RunWith(IcuTestFmwkRunner.class) 23public class BreakIteratorTest extends TestFmwk 24{ 25 private BreakIterator characterBreak; 26 private BreakIterator wordBreak; 27 private BreakIterator lineBreak; 28 private BreakIterator sentenceBreak; 29 private BreakIterator titleBreak; 30 31 public static void main(String[] args) throws Exception { 32 new BreakIteratorTest().run(args); 33 } 34 public BreakIteratorTest() 35 { 36 37 } 38 protected void init(){ 39 characterBreak = BreakIterator.getCharacterInstance(); 40 wordBreak = BreakIterator.getWordInstance(); 41 lineBreak = BreakIterator.getLineInstance(); 42 //logln("Creating sentence iterator..."); 43 sentenceBreak = BreakIterator.getSentenceInstance(); 44 //logln("Finished creating sentence iterator..."); 45 titleBreak = BreakIterator.getTitleInstance(); 46 } 47 //========================================================================= 48 // general test subroutines 49 //========================================================================= 50 51 private void generalIteratorTest(BreakIterator bi, List<String> expectedResult) { 52 StringBuffer buffer = new StringBuffer(); 53 String text; 54 for (int i = 0; i < expectedResult.size(); i++) { 55 text = expectedResult.get(i); 56 buffer.append(text); 57 } 58 text = buffer.toString(); 59 60 bi.setText(text); 61 62 List<String> nextResults = _testFirstAndNext(bi, text); 63 List<String> previousResults = _testLastAndPrevious(bi, text); 64 65 logln("comparing forward and backward..."); 66 int errs = getErrorCount(); 67 compareFragmentLists("forward iteration", "backward iteration", nextResults, 68 previousResults); 69 if (getErrorCount() == errs) { 70 logln("comparing expected and actual..."); 71 compareFragmentLists("expected result", "actual result", expectedResult, 72 nextResults); 73 } 74 75 int[] boundaries = new int[expectedResult.size() + 3]; 76 boundaries[0] = BreakIterator.DONE; 77 boundaries[1] = 0; 78 for (int i = 0; i < expectedResult.size(); i++) 79 boundaries[i + 2] = boundaries[i + 1] + (expectedResult.get(i)). 80 length(); 81 boundaries[boundaries.length - 1] = BreakIterator.DONE; 82 83 _testFollowing(bi, text, boundaries); 84 _testPreceding(bi, text, boundaries); 85 _testIsBoundary(bi, text, boundaries); 86 87 doMultipleSelectionTest(bi, text); 88 } 89 90 private List<String> _testFirstAndNext(BreakIterator bi, String text) { 91 int p = bi.first(); 92 int lastP = p; 93 List<String> result = new ArrayList<String>(); 94 95 if (p != 0) 96 errln("first() returned " + p + " instead of 0"); 97 while (p != BreakIterator.DONE) { 98 p = bi.next(); 99 if (p != BreakIterator.DONE) { 100 if (p <= lastP) 101 errln("next() failed to move forward: next() on position " 102 + lastP + " yielded " + p); 103 104 result.add(text.substring(lastP, p)); 105 } 106 else { 107 if (lastP != text.length()) 108 errln("next() returned DONE prematurely: offset was " 109 + lastP + " instead of " + text.length()); 110 } 111 lastP = p; 112 } 113 return result; 114 } 115 116 private List<String> _testLastAndPrevious(BreakIterator bi, String text) { 117 int p = bi.last(); 118 int lastP = p; 119 List<String> result = new ArrayList<String>(); 120 121 if (p != text.length()) 122 errln("last() returned " + p + " instead of " + text.length()); 123 while (p != BreakIterator.DONE) { 124 p = bi.previous(); 125 if (p != BreakIterator.DONE) { 126 if (p >= lastP) 127 errln("previous() failed to move backward: previous() on position " 128 + lastP + " yielded " + p); 129 130 result.add(0, text.substring(p, lastP)); 131 } 132 else { 133 if (lastP != 0) 134 errln("previous() returned DONE prematurely: offset was " 135 + lastP + " instead of 0"); 136 } 137 lastP = p; 138 } 139 return result; 140 } 141 142 private void compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2) { 143 int p1 = 0; 144 int p2 = 0; 145 String s1; 146 String s2; 147 int t1 = 0; 148 int t2 = 0; 149 150 while (p1 < f1.size() && p2 < f2.size()) { 151 s1 = f1.get(p1); 152 s2 = f2.get(p2); 153 t1 += s1.length(); 154 t2 += s2.length(); 155 156 if (s1.equals(s2)) { 157 debugLogln(" >" + s1 + "<"); 158 ++p1; 159 ++p2; 160 } 161 else { 162 int tempT1 = t1; 163 int tempT2 = t2; 164 int tempP1 = p1; 165 int tempP2 = p2; 166 167 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) { 168 while (tempT1 < tempT2 && tempP1 < f1.size()) { 169 tempT1 += (f1.get(tempP1)).length(); 170 ++tempP1; 171 } 172 while (tempT2 < tempT1 && tempP2 < f2.size()) { 173 tempT2 += (f2.get(tempP2)).length(); 174 ++tempP2; 175 } 176 } 177 logln("*** " + f1Name + " has:"); 178 while (p1 <= tempP1 && p1 < f1.size()) { 179 s1 = f1.get(p1); 180 t1 += s1.length(); 181 debugLogln(" *** >" + s1 + "<"); 182 ++p1; 183 } 184 logln("***** " + f2Name + " has:"); 185 while (p2 <= tempP2 && p2 < f2.size()) { 186 s2 = f2.get(p2); 187 t2 += s2.length(); 188 debugLogln(" ***** >" + s2 + "<"); 189 ++p2; 190 } 191 errln("Discrepancy between " + f1Name + " and " + f2Name); 192 } 193 } 194 } 195 196 private void _testFollowing(BreakIterator bi, String text, int[] boundaries) { 197 logln("testFollowing():"); 198 int p = 2; 199 for (int i = 0; i <= text.length(); i++) { 200 if (i == boundaries[p]) 201 ++p; 202 203 int b = bi.following(i); 204 logln("bi.following(" + i + ") -> " + b); 205 if (b != boundaries[p]) 206 errln("Wrong result from following() for " + i + ": expected " + boundaries[p] 207 + ", got " + b); 208 } 209 } 210 211 private void _testPreceding(BreakIterator bi, String text, int[] boundaries) { 212 logln("testPreceding():"); 213 int p = 0; 214 for (int i = 0; i <= text.length(); i++) { 215 int b = bi.preceding(i); 216 logln("bi.preceding(" + i + ") -> " + b); 217 if (b != boundaries[p]) 218 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p] 219 + ", got " + b); 220 221 if (i == boundaries[p + 1]) 222 ++p; 223 } 224 } 225 226 private void _testIsBoundary(BreakIterator bi, String text, int[] boundaries) { 227 logln("testIsBoundary():"); 228 int p = 1; 229 boolean isB; 230 for (int i = 0; i <= text.length(); i++) { 231 isB = bi.isBoundary(i); 232 logln("bi.isBoundary(" + i + ") -> " + isB); 233 234 if (i == boundaries[p]) { 235 if (!isB) 236 errln("Wrong result from isBoundary() for " + i + ": expected true, got false"); 237 ++p; 238 } 239 else { 240 if (isB) 241 errln("Wrong result from isBoundary() for " + i + ": expected false, got true"); 242 } 243 } 244 } 245 246 private void doMultipleSelectionTest(BreakIterator iterator, String testText) 247 { 248 logln("Multiple selection test..."); 249 BreakIterator testIterator = (BreakIterator)iterator.clone(); 250 int offset = iterator.first(); 251 int testOffset; 252 int count = 0; 253 254 do { 255 testOffset = testIterator.first(); 256 testOffset = testIterator.next(count); 257 logln("next(" + count + ") -> " + testOffset); 258 if (offset != testOffset) 259 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset); 260 261 if (offset != BreakIterator.DONE) { 262 count++; 263 offset = iterator.next(); 264 } 265 } while (offset != BreakIterator.DONE); 266 267 // now do it backwards... 268 offset = iterator.last(); 269 count = 0; 270 271 do { 272 testOffset = testIterator.last(); 273 testOffset = testIterator.next(count); 274 logln("next(" + count + ") -> " + testOffset); 275 if (offset != testOffset) 276 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset); 277 278 if (offset != BreakIterator.DONE) { 279 count--; 280 offset = iterator.previous(); 281 } 282 } while (offset != BreakIterator.DONE); 283 } 284 285 286 private void doOtherInvariantTest(BreakIterator tb, String testChars) 287 { 288 StringBuffer work = new StringBuffer("a\r\na"); 289 int errorCount = 0; 290 291 // a break should never occur between CR and LF 292 for (int i = 0; i < testChars.length(); i++) { 293 work.setCharAt(0, testChars.charAt(i)); 294 for (int j = 0; j < testChars.length(); j++) { 295 work.setCharAt(3, testChars.charAt(j)); 296 tb.setText(work.toString()); 297 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next()) 298 if (k == 2) { 299 errln("Break between CR and LF in string U+" + Integer.toHexString( 300 (int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString( 301 (int)(work.charAt(3)))); 302 errorCount++; 303 if (errorCount >= 75) 304 return; 305 } 306 } 307 } 308 309 // a break should never occur before a non-spacing mark, unless it's preceded 310 // by a line terminator 311 work.setLength(0); 312 work.append("aaaa"); 313 for (int i = 0; i < testChars.length(); i++) { 314 char c = testChars.charAt(i); 315 if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003') 316 continue; 317 work.setCharAt(1, c); 318 for (int j = 0; j < testChars.length(); j++) { 319 c = testChars.charAt(j); 320 if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c) 321 != Character.ENCLOSING_MARK) 322 continue; 323 work.setCharAt(2, c); 324 tb.setText(work.toString()); 325 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next()) 326 if (k == 2) { 327 errln("Break between U+" + Integer.toHexString((int)(work.charAt(1))) 328 + " and U+" + Integer.toHexString((int)(work.charAt(2)))); 329 errorCount++; 330 if (errorCount >= 75) 331 return; 332 } 333 } 334 } 335 } 336 337 public void debugLogln(String s) { 338 final String zeros = "0000"; 339 String temp; 340 StringBuffer out = new StringBuffer(); 341 for (int i = 0; i < s.length(); i++) { 342 char c = s.charAt(i); 343 if (c >= ' ' && c < '\u007f') 344 out.append(c); 345 else { 346 out.append("\\u"); 347 temp = Integer.toHexString((int)c); 348 out.append(zeros.substring(0, 4 - temp.length())); 349 out.append(temp); 350 } 351 } 352 logln(out.toString()); 353 } 354 355 //========================================================================= 356 // tests 357 //========================================================================= 358 359 360 /** 361 * @bug 4097779 362 */ 363 public void TestBug4097779() { 364 List<String> wordSelectionData = new ArrayList<String>(2); 365 366 wordSelectionData.add("aa\u0300a"); 367 wordSelectionData.add(" "); 368 369 generalIteratorTest(wordBreak, wordSelectionData); 370 } 371 372 /** 373 * @bug 4098467 374 */ 375 public void TestBug4098467Words() { 376 List<String> wordSelectionData = new ArrayList<String>(); 377 378 // What follows is a string of Korean characters (I found it in the Yellow Pages 379 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed 380 // it correctly), first as precomposed syllables, and then as conjoining jamo. 381 // Both sequences should be semantically identical and break the same way. 382 // precomposed syllables... 383 wordSelectionData.add("\uc0c1\ud56d"); 384 wordSelectionData.add(" "); 385 wordSelectionData.add("\ud55c\uc778"); 386 wordSelectionData.add(" "); 387 wordSelectionData.add("\uc5f0\ud569"); 388 wordSelectionData.add(" "); 389 wordSelectionData.add("\uc7a5\ub85c\uad50\ud68c"); 390 wordSelectionData.add(" "); 391 // conjoining jamo... 392 wordSelectionData.add("\u1109\u1161\u11bc\u1112\u1161\u11bc"); 393 wordSelectionData.add(" "); 394 wordSelectionData.add("\u1112\u1161\u11ab\u110b\u1175\u11ab"); 395 wordSelectionData.add(" "); 396 wordSelectionData.add("\u110b\u1167\u11ab\u1112\u1161\u11b8"); 397 wordSelectionData.add(" "); 398 wordSelectionData.add("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c"); 399 wordSelectionData.add(" "); 400 401 generalIteratorTest(wordBreak, wordSelectionData); 402 } 403 404 405 /** 406 * @bug 4111338 407 */ 408 public void TestBug4111338() { 409 List<String> sentenceSelectionData = new ArrayList<String>(); 410 411 // test for bug #4111338: Don't break sentences at the boundary between CJK 412 // and other letters 413 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c" 414 + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba" 415 + "\u611d\u57b6\u2510\u5d46\".\u2029"); 416 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8" 417 + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0" 418 + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029"); 419 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4" 420 + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8" 421 + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029"); 422 sentenceSelectionData.add("He said, \"I can go there.\"\u2029"); 423 424 generalIteratorTest(sentenceBreak, sentenceSelectionData); 425 } 426 427 428 /** 429 * @bug 4143071 430 */ 431 public void TestBug4143071() { 432 List<String> sentenceSelectionData = new ArrayList<String>(3); 433 434 // Make sure sentences that end with digits work right 435 sentenceSelectionData.add("Today is the 27th of May, 1998. "); 436 sentenceSelectionData.add("Tomorrow will be 28 May 1998. "); 437 sentenceSelectionData.add("The day after will be the 30th.\u2029"); 438 439 generalIteratorTest(sentenceBreak, sentenceSelectionData); 440 } 441 442 /** 443 * @bug 4152416 444 */ 445 public void TestBug4152416() { 446 List<String> sentenceSelectionData = new ArrayList<String>(2); 447 448 // Make sure sentences ending with a capital letter are treated correctly 449 sentenceSelectionData.add("The type of all primitive " 450 + "<code>boolean</code> values accessed in the target VM. "); 451 sentenceSelectionData.add("Calls to xxx will return an " 452 + "implementor of this interface.\u2029"); 453 454 generalIteratorTest(sentenceBreak, sentenceSelectionData); 455 } 456 457 /** 458 * @bug 4152117 459 */ 460 public void TestBug4152117() { 461 List<String> sentenceSelectionData = new ArrayList<String>(3); 462 463 // Make sure sentence breaking is handling punctuation correctly 464 // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE 465 // IT DOESN'T CROP UP] 466 sentenceSelectionData.add("Constructs a randomly generated " 467 + "BigInteger, uniformly distributed over the range <tt>0</tt> " 468 + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. "); 469 sentenceSelectionData.add("The uniformity of the distribution " 470 + "assumes that a fair source of random bits is provided in " 471 + "<tt>rnd</tt>. "); 472 sentenceSelectionData.add("Note that this constructor always " 473 + "constructs a non-negative BigInteger.\u2029"); 474 475 generalIteratorTest(sentenceBreak, sentenceSelectionData); 476 } 477 478 public void TestLineBreak() { 479 List<String> lineSelectionData = new ArrayList<String>(); 480 481 lineSelectionData.add("Multi-"); 482 lineSelectionData.add("Level "); 483 lineSelectionData.add("example "); 484 lineSelectionData.add("of "); 485 lineSelectionData.add("a "); 486 lineSelectionData.add("semi-"); 487 lineSelectionData.add("idiotic "); 488 lineSelectionData.add("non-"); 489 lineSelectionData.add("sensical "); 490 lineSelectionData.add("(non-"); 491 lineSelectionData.add("important) "); 492 lineSelectionData.add("sentence. "); 493 494 lineSelectionData.add("Hi "); 495 lineSelectionData.add("Hello "); 496 lineSelectionData.add("How\n"); 497 lineSelectionData.add("are\r"); 498 lineSelectionData.add("you\u2028"); 499 lineSelectionData.add("fine.\t"); 500 lineSelectionData.add("good. "); 501 502 lineSelectionData.add("Now\r"); 503 lineSelectionData.add("is\n"); 504 lineSelectionData.add("the\r\n"); 505 lineSelectionData.add("time\n"); 506 lineSelectionData.add("\r"); 507 lineSelectionData.add("for\r"); 508 lineSelectionData.add("\r"); 509 lineSelectionData.add("all"); 510 511 generalIteratorTest(lineBreak, lineSelectionData); 512 } 513 514 /** 515 * @bug 4068133 516 */ 517 public void TestBug4068133() { 518 List<String> lineSelectionData = new ArrayList<String>(9); 519 520 lineSelectionData.add("\u96f6"); 521 lineSelectionData.add("\u4e00\u3002"); 522 lineSelectionData.add("\u4e8c\u3001"); 523 lineSelectionData.add("\u4e09\u3002\u3001"); 524 lineSelectionData.add("\u56db\u3001\u3002\u3001"); 525 lineSelectionData.add("\u4e94,"); 526 lineSelectionData.add("\u516d."); 527 lineSelectionData.add("\u4e03.\u3001,\u3002"); 528 lineSelectionData.add("\u516b"); 529 530 generalIteratorTest(lineBreak, lineSelectionData); 531 } 532 533 /** 534 * @bug 4086052 535 */ 536 public void TestBug4086052() { 537 List<String> lineSelectionData = new ArrayList<String>(1); 538 539 lineSelectionData.add("foo\u00a0bar "); 540// lineSelectionData.addElement("foo\ufeffbar"); 541 542 generalIteratorTest(lineBreak, lineSelectionData); 543 } 544 545 /** 546 * @bug 4097920 547 */ 548 public void TestBug4097920() { 549 List<String> lineSelectionData = new ArrayList<String>(3); 550 551 lineSelectionData.add("dog,cat,mouse "); 552 lineSelectionData.add("(one)"); 553 lineSelectionData.add("(two)\n"); 554 generalIteratorTest(lineBreak, lineSelectionData); 555 } 556 557 558 559 /** 560 * @bug 4117554 561 */ 562 public void TestBug4117554Lines() { 563 List<String> lineSelectionData = new ArrayList<String>(3); 564 565 // Fullwidth .!? should be treated as postJwrd 566 lineSelectionData.add("\u4e01\uff0e"); 567 lineSelectionData.add("\u4e02\uff01"); 568 lineSelectionData.add("\u4e03\uff1f"); 569 570 generalIteratorTest(lineBreak, lineSelectionData); 571 } 572 573 public void TestLettersAndDigits() { 574 // a character sequence such as "X11" or "30F3" or "native2ascii" should 575 // be kept together as a single word 576 List<String> lineSelectionData = new ArrayList<String>(3); 577 578 lineSelectionData.add("X11 "); 579 lineSelectionData.add("30F3 "); 580 lineSelectionData.add("native2ascii"); 581 582 generalIteratorTest(lineBreak, lineSelectionData); 583 } 584 585 586 private static final String graveS = "S\u0300"; 587 private static final String acuteBelowI = "i\u0317"; 588 private static final String acuteE = "e\u0301"; 589 private static final String circumflexA = "a\u0302"; 590 private static final String tildeE = "e\u0303"; 591 592 public void TestCharacterBreak() { 593 List<String> characterSelectionData = new ArrayList<String>(); 594 595 characterSelectionData.add(graveS); 596 characterSelectionData.add(acuteBelowI); 597 characterSelectionData.add("m"); 598 characterSelectionData.add("p"); 599 characterSelectionData.add("l"); 600 characterSelectionData.add(acuteE); 601 characterSelectionData.add(" "); 602 characterSelectionData.add("s"); 603 characterSelectionData.add(circumflexA); 604 characterSelectionData.add("m"); 605 characterSelectionData.add("p"); 606 characterSelectionData.add("l"); 607 characterSelectionData.add(tildeE); 608 characterSelectionData.add("."); 609 characterSelectionData.add("w"); 610 characterSelectionData.add(circumflexA); 611 characterSelectionData.add("w"); 612 characterSelectionData.add("a"); 613 characterSelectionData.add("f"); 614 characterSelectionData.add("q"); 615 characterSelectionData.add("\n"); 616 characterSelectionData.add("\r"); 617 characterSelectionData.add("\r\n"); 618 characterSelectionData.add("\n"); 619 620 generalIteratorTest(characterBreak, characterSelectionData); 621 } 622 623 /** 624 * @bug 4098467 625 */ 626 public void TestBug4098467Characters() { 627 List<String> characterSelectionData = new ArrayList<String>(); 628 629 // What follows is a string of Korean characters (I found it in the Yellow Pages 630 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed 631 // it correctly), first as precomposed syllables, and then as conjoining jamo. 632 // Both sequences should be semantically identical and break the same way. 633 // precomposed syllables... 634 characterSelectionData.add("\uc0c1"); 635 characterSelectionData.add("\ud56d"); 636 characterSelectionData.add(" "); 637 characterSelectionData.add("\ud55c"); 638 characterSelectionData.add("\uc778"); 639 characterSelectionData.add(" "); 640 characterSelectionData.add("\uc5f0"); 641 characterSelectionData.add("\ud569"); 642 characterSelectionData.add(" "); 643 characterSelectionData.add("\uc7a5"); 644 characterSelectionData.add("\ub85c"); 645 characterSelectionData.add("\uad50"); 646 characterSelectionData.add("\ud68c"); 647 characterSelectionData.add(" "); 648 // conjoining jamo... 649 characterSelectionData.add("\u1109\u1161\u11bc"); 650 characterSelectionData.add("\u1112\u1161\u11bc"); 651 characterSelectionData.add(" "); 652 characterSelectionData.add("\u1112\u1161\u11ab"); 653 characterSelectionData.add("\u110b\u1175\u11ab"); 654 characterSelectionData.add(" "); 655 characterSelectionData.add("\u110b\u1167\u11ab"); 656 characterSelectionData.add("\u1112\u1161\u11b8"); 657 characterSelectionData.add(" "); 658 characterSelectionData.add("\u110c\u1161\u11bc"); 659 characterSelectionData.add("\u1105\u1169"); 660 characterSelectionData.add("\u1100\u116d"); 661 characterSelectionData.add("\u1112\u116c"); 662 663 generalIteratorTest(characterBreak, characterSelectionData); 664 } 665 666 public void TestTitleBreak() 667 { 668 List<String> titleData = new ArrayList<String>(); 669 titleData.add(" "); 670 titleData.add("This "); 671 titleData.add("is "); 672 titleData.add("a "); 673 titleData.add("simple "); 674 titleData.add("sample "); 675 titleData.add("sentence. "); 676 titleData.add("This "); 677 678 generalIteratorTest(titleBreak, titleData); 679 } 680 681 682 683 /* 684 * @bug 4153072 685 */ 686 public void TestBug4153072() { 687 BreakIterator iter = BreakIterator.getWordInstance(); 688 String str = "...Hello, World!..."; 689 int begin = 3; 690 int end = str.length() - 3; 691 // not used boolean gotException = false; 692 693 694 iter.setText(new StringCharacterIterator(str, begin, end, begin)); 695 for (int index = -1; index < begin + 1; ++index) { 696 try { 697 iter.isBoundary(index); 698 if (index < begin) 699 errln("Didn't get exception with offset = " + index + 700 " and begin index = " + begin); 701 } 702 catch (IllegalArgumentException e) { 703 if (index >= begin) 704 errln("Got exception with offset = " + index + 705 " and begin index = " + begin); 706 } 707 } 708 } 709 710 711 public void TestBug4146175Lines() { 712 List<String> lineSelectionData = new ArrayList<String>(2); 713 714 // the fullwidth comma should stick to the preceding Japanese character 715 lineSelectionData.add("\u7d42\uff0c"); 716 lineSelectionData.add("\u308f"); 717 718 generalIteratorTest(lineBreak, lineSelectionData); 719 } 720 721 private static final String cannedTestChars 722 = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2" 723 + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3" 724 + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303" 725 + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000" 726 + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f" 727 + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164"; 728 729 public void TestSentenceInvariants() 730 { 731 BreakIterator e = BreakIterator.getSentenceInstance(); 732 doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff"); 733 } 734 735 public void TestEmptyString() 736 { 737 String text = ""; 738 List<String> x = new ArrayList<String>(1); 739 x.add(text); 740 741 generalIteratorTest(lineBreak, x); 742 } 743 744 public void TestGetAvailableLocales() 745 { 746 Locale[] locList = BreakIterator.getAvailableLocales(); 747 748 if (locList.length == 0) 749 errln("getAvailableLocales() returned an empty list!"); 750 // I have no idea how to test this function... 751 752 android.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales(); 753 if (ulocList.length == 0) { 754 errln("getAvailableULocales() returned an empty list!"); 755 } else { 756 logln("getAvailableULocales() returned " + ulocList.length + " locales"); 757 } 758 } 759 760 761 /** 762 * @bug 4068137 763 */ 764 public void TestEndBehavior() 765 { 766 String testString = "boo."; 767 BreakIterator wb = BreakIterator.getWordInstance(); 768 wb.setText(testString); 769 770 if (wb.first() != 0) 771 errln("Didn't get break at beginning of string."); 772 if (wb.next() != 3) 773 errln("Didn't get break before period in \"boo.\""); 774 if (wb.current() != 4 && wb.next() != 4) 775 errln("Didn't get break at end of string."); 776 } 777 778 // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL] 779 /** 780 * Port From: ICU4C v1.8.1 : textbounds : IntlTestTextBoundary 781 * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp 782 **/ 783 /** 784 * test methods preceding, following and isBoundary 785 **/ 786 public void TestPreceding() { 787 String words3 = "aaa bbb ccc"; 788 BreakIterator e = BreakIterator.getWordInstance(Locale.getDefault()); 789 e.setText( words3 ); 790 e.first(); 791 int p1 = e.next(); 792 int p2 = e.next(); 793 int p3 = e.next(); 794 int p4 = e.next(); 795 796 int f = e.following(p2+1); 797 int p = e.preceding(p2+1); 798 if (f!=p3) 799 errln("IntlTestTextBoundary::TestPreceding: f!=p3"); 800 if (p!=p2) 801 errln("IntlTestTextBoundary::TestPreceding: p!=p2"); 802 803 if (p1+1!=p2) 804 errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2"); 805 806 if (p3+1!=p4) 807 errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4"); 808 809 if (!e.isBoundary(p2) || e.isBoundary(p2+1) || !e.isBoundary(p3)) 810 { 811 errln("IntlTestTextBoundary::TestPreceding: isBoundary err"); 812 } 813 } 814 815 816 /** 817 * Bug 4450804 818 */ 819 public void TestLineBreakContractions() { 820 List<String> expected = new ArrayList<String>(7); 821 expected.add("These "); 822 expected.add("are "); 823 expected.add("'foobles'. "); 824 expected.add("Don't "); 825 expected.add("you "); 826 expected.add("like "); 827 expected.add("them?"); 828 generalIteratorTest(lineBreak, expected); 829 } 830 831 /** 832 * Ticket#5615 833 */ 834 public void TestT5615() { 835 android.icu.util.ULocale[] ulocales = BreakIterator.getAvailableULocales(); 836 int type = 0; 837 android.icu.util.ULocale loc = null; 838 try { 839 for (int i = 0; i < ulocales.length; i++) { 840 loc = ulocales[i]; 841 for (type = 0; type < 5 /* 5 = BreakIterator.KIND_COUNT */; ++type) { 842 BreakIterator brk = BreakIterator.getBreakInstance(loc, type); 843 if (brk == null) { 844 errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc); 845 } 846 } 847 } 848 } catch (Exception e) { 849 errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc + " / exception: " + e.getMessage()); 850 } 851 } 852 853 /* 854 * Test case for Ticket#10721. BreakIterator factory method should throw NPE 855 * when specified locale is null. 856 */ 857 public void TestNullLocale() { 858 Locale loc = null; 859 ULocale uloc = null; 860 861 @SuppressWarnings("unused") 862 BreakIterator brk; 863 864 // Character 865 try { 866 brk = BreakIterator.getCharacterInstance(loc); 867 errln("getCharacterInstance((Locale)null) did not throw NPE."); 868 } catch (NullPointerException e) { /* OK */ } 869 try { 870 brk = BreakIterator.getCharacterInstance(uloc); 871 errln("getCharacterInstance((ULocale)null) did not throw NPE."); 872 } catch (NullPointerException e) { /* OK */ } 873 874 // Line 875 try { 876 brk = BreakIterator.getLineInstance(loc); 877 errln("getLineInstance((Locale)null) did not throw NPE."); 878 } catch (NullPointerException e) { /* OK */ } 879 try { 880 brk = BreakIterator.getLineInstance(uloc); 881 errln("getLineInstance((ULocale)null) did not throw NPE."); 882 } catch (NullPointerException e) { /* OK */ } 883 884 // Sentence 885 try { 886 brk = BreakIterator.getSentenceInstance(loc); 887 errln("getSentenceInstance((Locale)null) did not throw NPE."); 888 } catch (NullPointerException e) { /* OK */ } 889 try { 890 brk = BreakIterator.getSentenceInstance(uloc); 891 errln("getSentenceInstance((ULocale)null) did not throw NPE."); 892 } catch (NullPointerException e) { /* OK */ } 893 894 // Title 895 try { 896 brk = BreakIterator.getTitleInstance(loc); 897 errln("getTitleInstance((Locale)null) did not throw NPE."); 898 } catch (NullPointerException e) { /* OK */ } 899 try { 900 brk = BreakIterator.getTitleInstance(uloc); 901 errln("getTitleInstance((ULocale)null) did not throw NPE."); 902 } catch (NullPointerException e) { /* OK */ } 903 904 // Word 905 try { 906 brk = BreakIterator.getWordInstance(loc); 907 errln("getWordInstance((Locale)null) did not throw NPE."); 908 } catch (NullPointerException e) { /* OK */ } 909 try { 910 brk = BreakIterator.getWordInstance(uloc); 911 errln("getWordInstance((ULocale)null) did not throw NPE."); 912 } catch (NullPointerException e) { /* OK */ } 913 } 914 915 /** 916 * Test FilteredBreakIteratorBuilder newly introduced 917 */ 918 public void TestFilteredBreakIteratorBuilder() { 919 FilteredBreakIteratorBuilder builder; 920 BreakIterator baseBI; 921 BreakIterator filteredBI; 922 923 String text = "In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."; // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited. 924 String ABBR_MR = "Mr."; 925 String ABBR_CAPT = "Capt."; 926 927 { 928 logln("Constructing empty builder\n"); 929 builder = FilteredBreakIteratorBuilder.createInstance(); 930 931 logln("Constructing base BI\n"); 932 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 933 934 logln("Building new BI\n"); 935 filteredBI = builder.build(baseBI); 936 937 logln("Testing:"); 938 filteredBI.setText(text); 939 assertEquals("1st next", 20, filteredBI.next()); 940 assertEquals("1st next", 84, filteredBI.next()); 941 assertEquals("1st next", 90, filteredBI.next()); 942 assertEquals("1st next", 181, filteredBI.next()); 943 assertEquals("1st next", 278, filteredBI.next()); 944 filteredBI.first(); 945 } 946 947 { 948 logln("Constructing empty builder\n"); 949 builder = FilteredBreakIteratorBuilder.createInstance(); 950 951 logln("Adding Mr. as an exception\n"); 952 953 assertEquals("2.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR)); 954 assertEquals("2.2 suppressBreakAfter", false, builder.suppressBreakAfter(ABBR_MR)); 955 assertEquals("2.3 unsuppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_MR)); 956 assertEquals("2.4 unsuppressBreakAfter", false, builder.unsuppressBreakAfter(ABBR_MR)); 957 assertEquals("2.5 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR)); 958 959 logln("Constructing base BI\n"); 960 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 961 962 logln("Building new BI\n"); 963 filteredBI = builder.build(baseBI); 964 965 logln("Testing:"); 966 filteredBI.setText(text); 967 assertEquals("2nd next", 84, filteredBI.next()); 968 assertEquals("2nd next", 90, filteredBI.next()); 969 assertEquals("2nd next", 278, filteredBI.next()); 970 filteredBI.first(); 971 } 972 973 974 { 975 logln("Constructing empty builder\n"); 976 builder = FilteredBreakIteratorBuilder.createInstance(); 977 978 logln("Adding Mr. and Capt as an exception\n"); 979 assertEquals("3.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR)); 980 assertEquals("3.2 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_CAPT)); 981 982 logln("Constructing base BI\n"); 983 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 984 985 logln("Building new BI\n"); 986 filteredBI = builder.build(baseBI); 987 988 logln("Testing:"); 989 filteredBI.setText(text); 990 assertEquals("3rd next", 84, filteredBI.next()); 991 assertEquals("3rd next", 278, filteredBI.next()); 992 filteredBI.first(); 993 } 994 995 { 996 logln("Constructing English builder\n"); 997 builder = FilteredBreakIteratorBuilder.createInstance(ULocale.ENGLISH); 998 999 logln("Constructing base BI\n"); 1000 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 1001 1002 logln("unsuppressing 'Capt'"); 1003 assertEquals("1st suppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_CAPT)); 1004 1005 logln("Building new BI\n"); 1006 filteredBI = builder.build(baseBI); 1007 1008 if(filteredBI != null) { 1009 logln("Testing:"); 1010 filteredBI.setText(text); 1011 assertEquals("4th next", 84, filteredBI.next()); 1012 assertEquals("4th next", 90, filteredBI.next()); 1013 assertEquals("4th next", 278, filteredBI.next()); 1014 filteredBI.first(); 1015 } 1016 } 1017 1018 { 1019 logln("Constructing English builder\n"); 1020 builder = FilteredBreakIteratorBuilder.createInstance(ULocale.ENGLISH); 1021 1022 logln("Constructing base BI\n"); 1023 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 1024 1025 logln("Building new BI\n"); 1026 filteredBI = builder.build(baseBI); 1027 1028 if(filteredBI != null) { 1029 logln("Testing:"); 1030 filteredBI.setText(text); 1031 1032 assertEquals("5th next", 84, filteredBI.next()); 1033 assertEquals("5th next", 278, filteredBI.next()); 1034 filteredBI.first(); 1035 } 1036 } 1037 1038 { 1039 logln("Constructing French builder"); 1040 builder = FilteredBreakIteratorBuilder.createInstance(ULocale.FRENCH); 1041 1042 logln("Constructing base BI\n"); 1043 baseBI = BreakIterator.getSentenceInstance(Locale.FRENCH); 1044 1045 logln("Building new BI\n"); 1046 filteredBI = builder.build(baseBI); 1047 1048 if(filteredBI != null) { 1049 logln("Testing:"); 1050 filteredBI.setText(text); 1051 assertEquals("6th next", 20, filteredBI.next()); 1052 assertEquals("6th next", 84, filteredBI.next()); 1053 filteredBI.first(); 1054 } 1055 } 1056 } 1057} 1058