1/* GENERATED SOURCE. DO NOT MODIFY. */
2/*
3 *******************************************************************************
4 * Copyright (C) 1996-2014, International Business Machines Corporation and    *
5 * others. All Rights Reserved.                                                *
6 *******************************************************************************
7 */
8package android.icu.dev.test.rbbi;
9
10import java.text.StringCharacterIterator;
11import java.util.ArrayList;
12import java.util.List;
13import java.util.Locale;
14
15import android.icu.dev.test.TestFmwk;
16import android.icu.text.BreakIterator;
17import android.icu.text.FilteredBreakIteratorBuilder;
18import android.icu.util.ULocale;
19import org.junit.runner.RunWith;
20import android.icu.junit.IcuTestFmwkRunner;
21
22@RunWith(IcuTestFmwkRunner.class)
23public class BreakIteratorTest extends TestFmwk
24{
25    private BreakIterator characterBreak;
26    private BreakIterator wordBreak;
27    private BreakIterator lineBreak;
28    private BreakIterator sentenceBreak;
29    private BreakIterator titleBreak;
30
31    public static void main(String[] args) throws Exception {
32        new BreakIteratorTest().run(args);
33    }
34    public BreakIteratorTest()
35    {
36
37    }
38    protected void init(){
39        characterBreak = BreakIterator.getCharacterInstance();
40        wordBreak = BreakIterator.getWordInstance();
41        lineBreak = BreakIterator.getLineInstance();
42        //logln("Creating sentence iterator...");
43        sentenceBreak = BreakIterator.getSentenceInstance();
44        //logln("Finished creating sentence iterator...");
45        titleBreak = BreakIterator.getTitleInstance();
46    }
47    //=========================================================================
48    // general test subroutines
49    //=========================================================================
50
51    private void generalIteratorTest(BreakIterator bi, List<String> expectedResult) {
52        StringBuffer buffer = new StringBuffer();
53        String text;
54        for (int i = 0; i < expectedResult.size(); i++) {
55            text = expectedResult.get(i);
56            buffer.append(text);
57        }
58        text = buffer.toString();
59
60        bi.setText(text);
61
62        List<String> nextResults = _testFirstAndNext(bi, text);
63        List<String> previousResults = _testLastAndPrevious(bi, text);
64
65        logln("comparing forward and backward...");
66        int errs = getErrorCount();
67        compareFragmentLists("forward iteration", "backward iteration", nextResults,
68                        previousResults);
69        if (getErrorCount() == errs) {
70            logln("comparing expected and actual...");
71            compareFragmentLists("expected result", "actual result", expectedResult,
72                            nextResults);
73        }
74
75        int[] boundaries = new int[expectedResult.size() + 3];
76        boundaries[0] = BreakIterator.DONE;
77        boundaries[1] = 0;
78        for (int i = 0; i < expectedResult.size(); i++)
79            boundaries[i + 2] = boundaries[i + 1] + (expectedResult.get(i)).
80                            length();
81        boundaries[boundaries.length - 1] = BreakIterator.DONE;
82
83        _testFollowing(bi, text, boundaries);
84        _testPreceding(bi, text, boundaries);
85        _testIsBoundary(bi, text, boundaries);
86
87        doMultipleSelectionTest(bi, text);
88    }
89
90    private List<String> _testFirstAndNext(BreakIterator bi, String text) {
91        int p = bi.first();
92        int lastP = p;
93        List<String> result = new ArrayList<String>();
94
95        if (p != 0)
96            errln("first() returned " + p + " instead of 0");
97        while (p != BreakIterator.DONE) {
98            p = bi.next();
99            if (p != BreakIterator.DONE) {
100                if (p <= lastP)
101                    errln("next() failed to move forward: next() on position "
102                                    + lastP + " yielded " + p);
103
104                result.add(text.substring(lastP, p));
105            }
106            else {
107                if (lastP != text.length())
108                    errln("next() returned DONE prematurely: offset was "
109                                    + lastP + " instead of " + text.length());
110            }
111            lastP = p;
112        }
113        return result;
114    }
115
116    private List<String> _testLastAndPrevious(BreakIterator bi, String text) {
117        int p = bi.last();
118        int lastP = p;
119        List<String> result = new ArrayList<String>();
120
121        if (p != text.length())
122            errln("last() returned " + p + " instead of " + text.length());
123        while (p != BreakIterator.DONE) {
124            p = bi.previous();
125            if (p != BreakIterator.DONE) {
126                if (p >= lastP)
127                    errln("previous() failed to move backward: previous() on position "
128                                    + lastP + " yielded " + p);
129
130                result.add(0, text.substring(p, lastP));
131            }
132            else {
133                if (lastP != 0)
134                    errln("previous() returned DONE prematurely: offset was "
135                                    + lastP + " instead of 0");
136            }
137            lastP = p;
138        }
139        return result;
140    }
141
142    private void compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2) {
143        int p1 = 0;
144        int p2 = 0;
145        String s1;
146        String s2;
147        int t1 = 0;
148        int t2 = 0;
149
150        while (p1 < f1.size() && p2 < f2.size()) {
151            s1 = f1.get(p1);
152            s2 = f2.get(p2);
153            t1 += s1.length();
154            t2 += s2.length();
155
156            if (s1.equals(s2)) {
157                debugLogln("   >" + s1 + "<");
158                ++p1;
159                ++p2;
160            }
161            else {
162                int tempT1 = t1;
163                int tempT2 = t2;
164                int tempP1 = p1;
165                int tempP2 = p2;
166
167                while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {
168                    while (tempT1 < tempT2 && tempP1 < f1.size()) {
169                        tempT1 += (f1.get(tempP1)).length();
170                        ++tempP1;
171                    }
172                    while (tempT2 < tempT1 && tempP2 < f2.size()) {
173                        tempT2 += (f2.get(tempP2)).length();
174                        ++tempP2;
175                    }
176                }
177                logln("*** " + f1Name + " has:");
178                while (p1 <= tempP1 && p1 < f1.size()) {
179                    s1 = f1.get(p1);
180                    t1 += s1.length();
181                    debugLogln(" *** >" + s1 + "<");
182                    ++p1;
183                }
184                logln("***** " + f2Name + " has:");
185                while (p2 <= tempP2 && p2 < f2.size()) {
186                    s2 = f2.get(p2);
187                    t2 += s2.length();
188                    debugLogln(" ***** >" + s2 + "<");
189                    ++p2;
190                }
191                errln("Discrepancy between " + f1Name + " and " + f2Name);
192            }
193        }
194    }
195
196    private void _testFollowing(BreakIterator bi, String text, int[] boundaries) {
197        logln("testFollowing():");
198        int p = 2;
199        for (int i = 0; i <= text.length(); i++) {
200            if (i == boundaries[p])
201                ++p;
202
203            int b = bi.following(i);
204            logln("bi.following(" + i + ") -> " + b);
205            if (b != boundaries[p])
206                errln("Wrong result from following() for " + i + ": expected " + boundaries[p]
207                                + ", got " + b);
208        }
209    }
210
211    private void _testPreceding(BreakIterator bi, String text, int[] boundaries) {
212        logln("testPreceding():");
213        int p = 0;
214        for (int i = 0; i <= text.length(); i++) {
215            int b = bi.preceding(i);
216            logln("bi.preceding(" + i + ") -> " + b);
217            if (b != boundaries[p])
218                errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
219                                + ", got " + b);
220
221            if (i == boundaries[p + 1])
222                ++p;
223        }
224    }
225
226    private void _testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
227        logln("testIsBoundary():");
228        int p = 1;
229        boolean isB;
230        for (int i = 0; i <= text.length(); i++) {
231            isB = bi.isBoundary(i);
232            logln("bi.isBoundary(" + i + ") -> " + isB);
233
234            if (i == boundaries[p]) {
235                if (!isB)
236                    errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
237                ++p;
238            }
239            else {
240                if (isB)
241                    errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
242            }
243        }
244    }
245
246    private void doMultipleSelectionTest(BreakIterator iterator, String testText)
247    {
248        logln("Multiple selection test...");
249        BreakIterator testIterator = (BreakIterator)iterator.clone();
250        int offset = iterator.first();
251        int testOffset;
252        int count = 0;
253
254        do {
255            testOffset = testIterator.first();
256            testOffset = testIterator.next(count);
257            logln("next(" + count + ") -> " + testOffset);
258            if (offset != testOffset)
259                errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
260
261            if (offset != BreakIterator.DONE) {
262                count++;
263                offset = iterator.next();
264            }
265        } while (offset != BreakIterator.DONE);
266
267        // now do it backwards...
268        offset = iterator.last();
269        count = 0;
270
271        do {
272            testOffset = testIterator.last();
273            testOffset = testIterator.next(count);
274            logln("next(" + count + ") -> " + testOffset);
275            if (offset != testOffset)
276                errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
277
278            if (offset != BreakIterator.DONE) {
279                count--;
280                offset = iterator.previous();
281            }
282        } while (offset != BreakIterator.DONE);
283    }
284
285
286    private void doOtherInvariantTest(BreakIterator tb, String testChars)
287    {
288        StringBuffer work = new StringBuffer("a\r\na");
289        int errorCount = 0;
290
291        // a break should never occur between CR and LF
292        for (int i = 0; i < testChars.length(); i++) {
293            work.setCharAt(0, testChars.charAt(i));
294            for (int j = 0; j < testChars.length(); j++) {
295                work.setCharAt(3, testChars.charAt(j));
296                tb.setText(work.toString());
297                for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
298                    if (k == 2) {
299                        errln("Break between CR and LF in string U+" + Integer.toHexString(
300                                (int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(
301                                (int)(work.charAt(3))));
302                        errorCount++;
303                        if (errorCount >= 75)
304                            return;
305                    }
306            }
307        }
308
309        // a break should never occur before a non-spacing mark, unless it's preceded
310        // by a line terminator
311        work.setLength(0);
312        work.append("aaaa");
313        for (int i = 0; i < testChars.length(); i++) {
314            char c = testChars.charAt(i);
315            if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')
316                continue;
317            work.setCharAt(1, c);
318            for (int j = 0; j < testChars.length(); j++) {
319                c = testChars.charAt(j);
320                if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)
321                        != Character.ENCLOSING_MARK)
322                    continue;
323                work.setCharAt(2, c);
324                tb.setText(work.toString());
325                for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
326                    if (k == 2) {
327                        errln("Break between U+" + Integer.toHexString((int)(work.charAt(1)))
328                                + " and U+" + Integer.toHexString((int)(work.charAt(2))));
329                        errorCount++;
330                        if (errorCount >= 75)
331                            return;
332                    }
333            }
334        }
335    }
336
337    public void debugLogln(String s) {
338        final String zeros = "0000";
339        String temp;
340        StringBuffer out = new StringBuffer();
341        for (int i = 0; i < s.length(); i++) {
342            char c = s.charAt(i);
343            if (c >= ' ' && c < '\u007f')
344                out.append(c);
345            else {
346                out.append("\\u");
347                temp = Integer.toHexString((int)c);
348                out.append(zeros.substring(0, 4 - temp.length()));
349                out.append(temp);
350            }
351        }
352        logln(out.toString());
353    }
354
355    //=========================================================================
356    // tests
357    //=========================================================================
358
359
360    /**
361     * @bug 4097779
362     */
363    public void TestBug4097779() {
364        List<String> wordSelectionData = new ArrayList<String>(2);
365
366        wordSelectionData.add("aa\u0300a");
367        wordSelectionData.add(" ");
368
369        generalIteratorTest(wordBreak, wordSelectionData);
370    }
371
372    /**
373     * @bug 4098467
374     */
375    public void TestBug4098467Words() {
376        List<String> wordSelectionData = new ArrayList<String>();
377
378        // What follows is a string of Korean characters (I found it in the Yellow Pages
379        // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
380        // it correctly), first as precomposed syllables, and then as conjoining jamo.
381        // Both sequences should be semantically identical and break the same way.
382        // precomposed syllables...
383        wordSelectionData.add("\uc0c1\ud56d");
384        wordSelectionData.add(" ");
385        wordSelectionData.add("\ud55c\uc778");
386        wordSelectionData.add(" ");
387        wordSelectionData.add("\uc5f0\ud569");
388        wordSelectionData.add(" ");
389        wordSelectionData.add("\uc7a5\ub85c\uad50\ud68c");
390        wordSelectionData.add(" ");
391        // conjoining jamo...
392        wordSelectionData.add("\u1109\u1161\u11bc\u1112\u1161\u11bc");
393        wordSelectionData.add(" ");
394        wordSelectionData.add("\u1112\u1161\u11ab\u110b\u1175\u11ab");
395        wordSelectionData.add(" ");
396        wordSelectionData.add("\u110b\u1167\u11ab\u1112\u1161\u11b8");
397        wordSelectionData.add(" ");
398        wordSelectionData.add("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");
399        wordSelectionData.add(" ");
400
401        generalIteratorTest(wordBreak, wordSelectionData);
402    }
403
404
405    /**
406     * @bug 4111338
407     */
408    public void TestBug4111338() {
409        List<String> sentenceSelectionData = new ArrayList<String>();
410
411        // test for bug #4111338: Don't break sentences at the boundary between CJK
412        // and other letters
413        sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c"
414                + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba"
415                + "\u611d\u57b6\u2510\u5d46\".\u2029");
416        sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"
417                + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"
418                + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
419        sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4"
420                + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8"
421                + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
422        sentenceSelectionData.add("He said, \"I can go there.\"\u2029");
423
424        generalIteratorTest(sentenceBreak, sentenceSelectionData);
425    }
426
427
428    /**
429     * @bug 4143071
430     */
431    public void TestBug4143071() {
432        List<String> sentenceSelectionData = new ArrayList<String>(3);
433
434        // Make sure sentences that end with digits work right
435        sentenceSelectionData.add("Today is the 27th of May, 1998.  ");
436        sentenceSelectionData.add("Tomorrow will be 28 May 1998.  ");
437        sentenceSelectionData.add("The day after will be the 30th.\u2029");
438
439        generalIteratorTest(sentenceBreak, sentenceSelectionData);
440    }
441
442    /**
443     * @bug 4152416
444     */
445    public void TestBug4152416() {
446        List<String> sentenceSelectionData = new ArrayList<String>(2);
447
448        // Make sure sentences ending with a capital letter are treated correctly
449        sentenceSelectionData.add("The type of all primitive "
450                + "<code>boolean</code> values accessed in the target VM.  ");
451        sentenceSelectionData.add("Calls to xxx will return an "
452                + "implementor of this interface.\u2029");
453
454        generalIteratorTest(sentenceBreak, sentenceSelectionData);
455    }
456
457    /**
458     * @bug 4152117
459     */
460    public void TestBug4152117() {
461        List<String> sentenceSelectionData = new ArrayList<String>(3);
462
463        // Make sure sentence breaking is handling punctuation correctly
464        // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE
465        // IT DOESN'T CROP UP]
466        sentenceSelectionData.add("Constructs a randomly generated "
467                + "BigInteger, uniformly distributed over the range <tt>0</tt> "
468                + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive.  ");
469        sentenceSelectionData.add("The uniformity of the distribution "
470                + "assumes that a fair source of random bits is provided in "
471                + "<tt>rnd</tt>.  ");
472        sentenceSelectionData.add("Note that this constructor always "
473                + "constructs a non-negative BigInteger.\u2029");
474
475        generalIteratorTest(sentenceBreak, sentenceSelectionData);
476    }
477
478    public void TestLineBreak() {
479        List<String> lineSelectionData = new ArrayList<String>();
480
481        lineSelectionData.add("Multi-");
482        lineSelectionData.add("Level ");
483        lineSelectionData.add("example ");
484        lineSelectionData.add("of ");
485        lineSelectionData.add("a ");
486        lineSelectionData.add("semi-");
487        lineSelectionData.add("idiotic ");
488        lineSelectionData.add("non-");
489        lineSelectionData.add("sensical ");
490        lineSelectionData.add("(non-");
491        lineSelectionData.add("important) ");
492        lineSelectionData.add("sentence. ");
493
494        lineSelectionData.add("Hi  ");
495        lineSelectionData.add("Hello ");
496        lineSelectionData.add("How\n");
497        lineSelectionData.add("are\r");
498        lineSelectionData.add("you\u2028");
499        lineSelectionData.add("fine.\t");
500        lineSelectionData.add("good.  ");
501
502        lineSelectionData.add("Now\r");
503        lineSelectionData.add("is\n");
504        lineSelectionData.add("the\r\n");
505        lineSelectionData.add("time\n");
506        lineSelectionData.add("\r");
507        lineSelectionData.add("for\r");
508        lineSelectionData.add("\r");
509        lineSelectionData.add("all");
510
511        generalIteratorTest(lineBreak, lineSelectionData);
512    }
513
514    /**
515     * @bug 4068133
516     */
517    public void TestBug4068133() {
518        List<String> lineSelectionData = new ArrayList<String>(9);
519
520        lineSelectionData.add("\u96f6");
521        lineSelectionData.add("\u4e00\u3002");
522        lineSelectionData.add("\u4e8c\u3001");
523        lineSelectionData.add("\u4e09\u3002\u3001");
524        lineSelectionData.add("\u56db\u3001\u3002\u3001");
525        lineSelectionData.add("\u4e94,");
526        lineSelectionData.add("\u516d.");
527        lineSelectionData.add("\u4e03.\u3001,\u3002");
528        lineSelectionData.add("\u516b");
529
530        generalIteratorTest(lineBreak, lineSelectionData);
531    }
532
533    /**
534     * @bug 4086052
535     */
536    public void TestBug4086052() {
537        List<String> lineSelectionData = new ArrayList<String>(1);
538
539        lineSelectionData.add("foo\u00a0bar ");
540//        lineSelectionData.addElement("foo\ufeffbar");
541
542        generalIteratorTest(lineBreak, lineSelectionData);
543    }
544
545    /**
546     * @bug 4097920
547     */
548    public void TestBug4097920() {
549        List<String> lineSelectionData = new ArrayList<String>(3);
550
551        lineSelectionData.add("dog,cat,mouse ");
552        lineSelectionData.add("(one)");
553        lineSelectionData.add("(two)\n");
554        generalIteratorTest(lineBreak, lineSelectionData);
555    }
556
557
558
559    /**
560     * @bug 4117554
561     */
562    public void TestBug4117554Lines() {
563        List<String> lineSelectionData = new ArrayList<String>(3);
564
565        // Fullwidth .!? should be treated as postJwrd
566        lineSelectionData.add("\u4e01\uff0e");
567        lineSelectionData.add("\u4e02\uff01");
568        lineSelectionData.add("\u4e03\uff1f");
569
570        generalIteratorTest(lineBreak, lineSelectionData);
571    }
572
573    public void TestLettersAndDigits() {
574        // a character sequence such as "X11" or "30F3" or "native2ascii" should
575        // be kept together as a single word
576        List<String> lineSelectionData = new ArrayList<String>(3);
577
578        lineSelectionData.add("X11 ");
579        lineSelectionData.add("30F3 ");
580        lineSelectionData.add("native2ascii");
581
582        generalIteratorTest(lineBreak, lineSelectionData);
583    }
584
585
586    private static final String graveS = "S\u0300";
587    private static final String acuteBelowI = "i\u0317";
588    private static final String acuteE = "e\u0301";
589    private static final String circumflexA = "a\u0302";
590    private static final String tildeE = "e\u0303";
591
592    public void TestCharacterBreak() {
593        List<String> characterSelectionData = new ArrayList<String>();
594
595        characterSelectionData.add(graveS);
596        characterSelectionData.add(acuteBelowI);
597        characterSelectionData.add("m");
598        characterSelectionData.add("p");
599        characterSelectionData.add("l");
600        characterSelectionData.add(acuteE);
601        characterSelectionData.add(" ");
602        characterSelectionData.add("s");
603        characterSelectionData.add(circumflexA);
604        characterSelectionData.add("m");
605        characterSelectionData.add("p");
606        characterSelectionData.add("l");
607        characterSelectionData.add(tildeE);
608        characterSelectionData.add(".");
609        characterSelectionData.add("w");
610        characterSelectionData.add(circumflexA);
611        characterSelectionData.add("w");
612        characterSelectionData.add("a");
613        characterSelectionData.add("f");
614        characterSelectionData.add("q");
615        characterSelectionData.add("\n");
616        characterSelectionData.add("\r");
617        characterSelectionData.add("\r\n");
618        characterSelectionData.add("\n");
619
620        generalIteratorTest(characterBreak, characterSelectionData);
621    }
622
623    /**
624     * @bug 4098467
625     */
626    public void TestBug4098467Characters() {
627        List<String> characterSelectionData = new ArrayList<String>();
628
629        // What follows is a string of Korean characters (I found it in the Yellow Pages
630        // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
631        // it correctly), first as precomposed syllables, and then as conjoining jamo.
632        // Both sequences should be semantically identical and break the same way.
633        // precomposed syllables...
634        characterSelectionData.add("\uc0c1");
635        characterSelectionData.add("\ud56d");
636        characterSelectionData.add(" ");
637        characterSelectionData.add("\ud55c");
638        characterSelectionData.add("\uc778");
639        characterSelectionData.add(" ");
640        characterSelectionData.add("\uc5f0");
641        characterSelectionData.add("\ud569");
642        characterSelectionData.add(" ");
643        characterSelectionData.add("\uc7a5");
644        characterSelectionData.add("\ub85c");
645        characterSelectionData.add("\uad50");
646        characterSelectionData.add("\ud68c");
647        characterSelectionData.add(" ");
648        // conjoining jamo...
649        characterSelectionData.add("\u1109\u1161\u11bc");
650        characterSelectionData.add("\u1112\u1161\u11bc");
651        characterSelectionData.add(" ");
652        characterSelectionData.add("\u1112\u1161\u11ab");
653        characterSelectionData.add("\u110b\u1175\u11ab");
654        characterSelectionData.add(" ");
655        characterSelectionData.add("\u110b\u1167\u11ab");
656        characterSelectionData.add("\u1112\u1161\u11b8");
657        characterSelectionData.add(" ");
658        characterSelectionData.add("\u110c\u1161\u11bc");
659        characterSelectionData.add("\u1105\u1169");
660        characterSelectionData.add("\u1100\u116d");
661        characterSelectionData.add("\u1112\u116c");
662
663        generalIteratorTest(characterBreak, characterSelectionData);
664    }
665
666    public void TestTitleBreak()
667    {
668        List<String> titleData = new ArrayList<String>();
669        titleData.add("   ");
670        titleData.add("This ");
671        titleData.add("is ");
672        titleData.add("a ");
673        titleData.add("simple ");
674        titleData.add("sample ");
675        titleData.add("sentence. ");
676        titleData.add("This ");
677
678        generalIteratorTest(titleBreak, titleData);
679    }
680
681
682
683    /*
684     * @bug 4153072
685     */
686    public void TestBug4153072() {
687        BreakIterator iter = BreakIterator.getWordInstance();
688        String str = "...Hello, World!...";
689        int begin = 3;
690        int end = str.length() - 3;
691        // not used boolean gotException = false;
692
693
694        iter.setText(new StringCharacterIterator(str, begin, end, begin));
695        for (int index = -1; index < begin + 1; ++index) {
696            try {
697                iter.isBoundary(index);
698                if (index < begin)
699                    errln("Didn't get exception with offset = " + index +
700                                    " and begin index = " + begin);
701            }
702            catch (IllegalArgumentException e) {
703                if (index >= begin)
704                    errln("Got exception with offset = " + index +
705                                    " and begin index = " + begin);
706            }
707        }
708    }
709
710
711    public void TestBug4146175Lines() {
712        List<String> lineSelectionData = new ArrayList<String>(2);
713
714        // the fullwidth comma should stick to the preceding Japanese character
715        lineSelectionData.add("\u7d42\uff0c");
716        lineSelectionData.add("\u308f");
717
718        generalIteratorTest(lineBreak, lineSelectionData);
719    }
720
721    private static final String cannedTestChars
722        = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"
723        + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"
724        + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"
725        + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"
726        + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"
727        + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";
728
729    public void TestSentenceInvariants()
730    {
731        BreakIterator e = BreakIterator.getSentenceInstance();
732        doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
733    }
734
735    public void TestEmptyString()
736    {
737        String text = "";
738        List<String> x = new ArrayList<String>(1);
739        x.add(text);
740
741        generalIteratorTest(lineBreak, x);
742    }
743
744    public void TestGetAvailableLocales()
745    {
746        Locale[] locList = BreakIterator.getAvailableLocales();
747
748        if (locList.length == 0)
749            errln("getAvailableLocales() returned an empty list!");
750        // I have no idea how to test this function...
751
752        android.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales();
753        if (ulocList.length == 0) {
754            errln("getAvailableULocales() returned an empty list!");
755        } else {
756            logln("getAvailableULocales() returned " + ulocList.length + " locales");
757        }
758    }
759
760
761    /**
762     * @bug 4068137
763     */
764    public void TestEndBehavior()
765    {
766        String testString = "boo.";
767        BreakIterator wb = BreakIterator.getWordInstance();
768        wb.setText(testString);
769
770        if (wb.first() != 0)
771            errln("Didn't get break at beginning of string.");
772        if (wb.next() != 3)
773            errln("Didn't get break before period in \"boo.\"");
774        if (wb.current() != 4 && wb.next() != 4)
775            errln("Didn't get break at end of string.");
776    }
777
778    // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL]
779    /**
780     * Port From:   ICU4C v1.8.1 : textbounds : IntlTestTextBoundary
781     * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp
782     **/
783    /**
784     * test methods preceding, following and isBoundary
785     **/
786    public void TestPreceding() {
787        String words3 = "aaa bbb ccc";
788        BreakIterator e = BreakIterator.getWordInstance(Locale.getDefault());
789        e.setText( words3 );
790        e.first();
791        int p1 = e.next();
792        int p2 = e.next();
793        int p3 = e.next();
794        int p4 = e.next();
795
796        int f = e.following(p2+1);
797        int p = e.preceding(p2+1);
798        if (f!=p3)
799            errln("IntlTestTextBoundary::TestPreceding: f!=p3");
800        if (p!=p2)
801            errln("IntlTestTextBoundary::TestPreceding: p!=p2");
802
803        if (p1+1!=p2)
804            errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2");
805
806        if (p3+1!=p4)
807            errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4");
808
809        if (!e.isBoundary(p2) || e.isBoundary(p2+1) || !e.isBoundary(p3))
810        {
811            errln("IntlTestTextBoundary::TestPreceding: isBoundary err");
812        }
813    }
814
815
816    /**
817     * Bug 4450804
818     */
819    public void TestLineBreakContractions() {
820        List<String> expected = new ArrayList<String>(7);
821        expected.add("These ");
822        expected.add("are ");
823        expected.add("'foobles'. ");
824        expected.add("Don't ");
825        expected.add("you ");
826        expected.add("like ");
827        expected.add("them?");
828        generalIteratorTest(lineBreak, expected);
829    }
830
831    /**
832     * Ticket#5615
833     */
834    public void TestT5615() {
835        android.icu.util.ULocale[] ulocales = BreakIterator.getAvailableULocales();
836        int type = 0;
837        android.icu.util.ULocale loc = null;
838        try {
839            for (int i = 0; i < ulocales.length; i++) {
840                loc = ulocales[i];
841                for (type = 0; type < 5 /* 5 = BreakIterator.KIND_COUNT */; ++type) {
842                    BreakIterator brk = BreakIterator.getBreakInstance(loc, type);
843                    if (brk == null) {
844                        errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc);
845                    }
846                }
847            }
848        } catch (Exception e) {
849            errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc + " / exception: " + e.getMessage());
850        }
851    }
852
853    /*
854     * Test case for Ticket#10721. BreakIterator factory method should throw NPE
855     * when specified locale is null.
856     */
857    public void TestNullLocale() {
858        Locale loc = null;
859        ULocale uloc = null;
860
861        @SuppressWarnings("unused")
862        BreakIterator brk;
863
864        // Character
865        try {
866            brk = BreakIterator.getCharacterInstance(loc);
867            errln("getCharacterInstance((Locale)null) did not throw NPE.");
868        } catch (NullPointerException e) { /* OK */ }
869        try {
870            brk = BreakIterator.getCharacterInstance(uloc);
871            errln("getCharacterInstance((ULocale)null) did not throw NPE.");
872        } catch (NullPointerException e) { /* OK */ }
873
874        // Line
875        try {
876            brk = BreakIterator.getLineInstance(loc);
877            errln("getLineInstance((Locale)null) did not throw NPE.");
878        } catch (NullPointerException e) { /* OK */ }
879        try {
880            brk = BreakIterator.getLineInstance(uloc);
881            errln("getLineInstance((ULocale)null) did not throw NPE.");
882        } catch (NullPointerException e) { /* OK */ }
883
884        // Sentence
885        try {
886            brk = BreakIterator.getSentenceInstance(loc);
887            errln("getSentenceInstance((Locale)null) did not throw NPE.");
888        } catch (NullPointerException e) { /* OK */ }
889        try {
890            brk = BreakIterator.getSentenceInstance(uloc);
891            errln("getSentenceInstance((ULocale)null) did not throw NPE.");
892        } catch (NullPointerException e) { /* OK */ }
893
894        // Title
895        try {
896            brk = BreakIterator.getTitleInstance(loc);
897            errln("getTitleInstance((Locale)null) did not throw NPE.");
898        } catch (NullPointerException e) { /* OK */ }
899        try {
900            brk = BreakIterator.getTitleInstance(uloc);
901            errln("getTitleInstance((ULocale)null) did not throw NPE.");
902        } catch (NullPointerException e) { /* OK */ }
903
904        // Word
905        try {
906            brk = BreakIterator.getWordInstance(loc);
907            errln("getWordInstance((Locale)null) did not throw NPE.");
908        } catch (NullPointerException e) { /* OK */ }
909        try {
910            brk = BreakIterator.getWordInstance(uloc);
911            errln("getWordInstance((ULocale)null) did not throw NPE.");
912        } catch (NullPointerException e) { /* OK */ }
913    }
914
915    /**
916     * Test FilteredBreakIteratorBuilder newly introduced
917     */
918    public void TestFilteredBreakIteratorBuilder() {
919        FilteredBreakIteratorBuilder builder;
920        BreakIterator baseBI;
921        BreakIterator filteredBI;
922
923        String text = "In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."; // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
924        String ABBR_MR = "Mr.";
925        String ABBR_CAPT = "Capt.";
926
927        {
928            logln("Constructing empty builder\n");
929            builder = FilteredBreakIteratorBuilder.createInstance();
930
931            logln("Constructing base BI\n");
932            baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
933
934            logln("Building new BI\n");
935            filteredBI = builder.build(baseBI);
936
937            logln("Testing:");
938            filteredBI.setText(text);
939            assertEquals("1st next", 20, filteredBI.next());
940            assertEquals("1st next", 84, filteredBI.next());
941            assertEquals("1st next", 90, filteredBI.next());
942            assertEquals("1st next", 181, filteredBI.next());
943            assertEquals("1st next", 278, filteredBI.next());
944            filteredBI.first();
945        }
946
947        {
948            logln("Constructing empty builder\n");
949            builder = FilteredBreakIteratorBuilder.createInstance();
950
951            logln("Adding Mr. as an exception\n");
952
953            assertEquals("2.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR));
954            assertEquals("2.2 suppressBreakAfter", false, builder.suppressBreakAfter(ABBR_MR));
955            assertEquals("2.3 unsuppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_MR));
956            assertEquals("2.4 unsuppressBreakAfter", false, builder.unsuppressBreakAfter(ABBR_MR));
957            assertEquals("2.5 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR));
958
959            logln("Constructing base BI\n");
960            baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
961
962            logln("Building new BI\n");
963            filteredBI = builder.build(baseBI);
964
965            logln("Testing:");
966            filteredBI.setText(text);
967            assertEquals("2nd next", 84, filteredBI.next());
968            assertEquals("2nd next", 90, filteredBI.next());
969            assertEquals("2nd next", 278, filteredBI.next());
970            filteredBI.first();
971        }
972
973
974        {
975          logln("Constructing empty builder\n");
976          builder = FilteredBreakIteratorBuilder.createInstance();
977
978          logln("Adding Mr. and Capt as an exception\n");
979          assertEquals("3.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR));
980          assertEquals("3.2 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_CAPT));
981
982          logln("Constructing base BI\n");
983          baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
984
985          logln("Building new BI\n");
986          filteredBI = builder.build(baseBI);
987
988          logln("Testing:");
989          filteredBI.setText(text);
990          assertEquals("3rd next", 84, filteredBI.next());
991          assertEquals("3rd next", 278, filteredBI.next());
992          filteredBI.first();
993        }
994
995        {
996          logln("Constructing English builder\n");
997          builder = FilteredBreakIteratorBuilder.createInstance(ULocale.ENGLISH);
998
999          logln("Constructing base BI\n");
1000          baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
1001
1002          logln("unsuppressing 'Capt'");
1003          assertEquals("1st suppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_CAPT));
1004
1005          logln("Building new BI\n");
1006          filteredBI = builder.build(baseBI);
1007
1008          if(filteredBI != null) {
1009            logln("Testing:");
1010            filteredBI.setText(text);
1011            assertEquals("4th next", 84, filteredBI.next());
1012            assertEquals("4th next", 90, filteredBI.next());
1013            assertEquals("4th next", 278, filteredBI.next());
1014            filteredBI.first();
1015          }
1016        }
1017
1018        {
1019          logln("Constructing English builder\n");
1020          builder = FilteredBreakIteratorBuilder.createInstance(ULocale.ENGLISH);
1021
1022          logln("Constructing base BI\n");
1023          baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
1024
1025          logln("Building new BI\n");
1026          filteredBI = builder.build(baseBI);
1027
1028          if(filteredBI != null) {
1029            logln("Testing:");
1030            filteredBI.setText(text);
1031
1032            assertEquals("5th next", 84, filteredBI.next());
1033            assertEquals("5th next", 278, filteredBI.next());
1034            filteredBI.first();
1035          }
1036        }
1037
1038        {
1039          logln("Constructing French builder");
1040          builder = FilteredBreakIteratorBuilder.createInstance(ULocale.FRENCH);
1041
1042          logln("Constructing base BI\n");
1043          baseBI = BreakIterator.getSentenceInstance(Locale.FRENCH);
1044
1045          logln("Building new BI\n");
1046          filteredBI = builder.build(baseBI);
1047
1048          if(filteredBI != null) {
1049            logln("Testing:");
1050            filteredBI.setText(text);
1051            assertEquals("6th next", 20, filteredBI.next());
1052            assertEquals("6th next", 84, filteredBI.next());
1053            filteredBI.first();
1054          }
1055        }
1056    }
1057}
1058