1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * Copyright (c) 1999-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 *   Date        Name        Description
8 *   12/14/99    Madhu        Creation.
9 *   01/12/2000  Madhu        updated for changed API
10 ********************************************************************/
11
12#include "unicode/utypes.h"
13
14#if !UCONFIG_NO_BREAK_ITERATION
15
16#include "unicode/uchar.h"
17#include "intltest.h"
18#include "unicode/rbbi.h"
19#include "unicode/schriter.h"
20#include "rbbiapts.h"
21#include "rbbidata.h"
22#include "cstring.h"
23#include "ubrkimpl.h"
24#include "unicode/locid.h"
25#include "unicode/ustring.h"
26#include "unicode/utext.h"
27#include "cmemory.h"
28#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
29#include "unicode/filteredbrk.h"
30#include <stdio.h> // for sprintf
31#endif
32/**
33 * API Test the RuleBasedBreakIterator class
34 */
35
36
37#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
38dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
39
40#define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
41    errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
42
43void RBBIAPITest::TestCloneEquals()
44{
45
46    UErrorCode status=U_ZERO_ERROR;
47    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
48    RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
49    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
50    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
51    if(U_FAILURE(status)){
52        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
53        return;
54    }
55
56
57    UnicodeString testString="Testing word break iterators's clone() and equals()";
58    bi1->setText(testString);
59    bi2->setText(testString);
60    biequal->setText(testString);
61
62    bi3->setText("hello");
63
64    logln((UnicodeString)"Testing equals()");
65
66    logln((UnicodeString)"Testing == and !=");
67    UBool b = (*bi1 != *biequal);
68    b |= *bi1 == *bi2;
69    b |= *bi1 == *bi3;
70    if (b) {
71        errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
72    }
73
74    if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
75        errln((UnicodeString)"ERROR:2 RBBI's == and != operator  failed.");
76
77
78    // Quick test of RulesBasedBreakIterator assignment -
79    // Check that
80    //    two different iterators are !=
81    //    they are == after assignment
82    //    source and dest iterator produce the same next() after assignment.
83    //    deleting one doesn't disable the other.
84    logln("Testing assignment");
85    RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
86    if(U_FAILURE(status)){
87        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
88        return;
89    }
90
91    RuleBasedBreakIterator biDefault, biDefault2;
92    if(U_FAILURE(status)){
93        errln((UnicodeString)"FAIL : in construction of default iterator");
94        return;
95    }
96    if (biDefault == *bix) {
97        errln((UnicodeString)"ERROR: iterators should not compare ==");
98        return;
99    }
100    if (biDefault != biDefault2) {
101        errln((UnicodeString)"ERROR: iterators should compare ==");
102        return;
103    }
104
105
106    UnicodeString   HelloString("Hello Kitty");
107    bix->setText(HelloString);
108    if (*bix == *bi2) {
109        errln(UnicodeString("ERROR: strings should not be equal before assignment."));
110    }
111    *bix = *bi2;
112    if (*bix != *bi2) {
113        errln(UnicodeString("ERROR: strings should be equal before assignment."));
114    }
115
116    int bixnext = bix->next();
117    int bi2next = bi2->next();
118    if (! (bixnext == bi2next && bixnext == 7)) {
119        errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
120    }
121    delete bix;
122    if (bi2->next() != 8) {
123        errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
124    }
125
126
127
128    logln((UnicodeString)"Testing clone()");
129    RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
130    RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
131
132    if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
133      *bi1clone == *bi3 || *bi1clone == *bi2)
134        errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
135
136    if(*bi2clone == *bi1 || *bi2clone == *biequal ||
137       *bi2clone == *bi3 || *bi2clone != *bi2)
138        errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
139
140    if(bi1->getText() != bi1clone->getText()   ||
141       bi2clone->getText() != bi2->getText()   ||
142       *bi2clone == *bi1clone )
143        errln((UnicodeString)"ERROR: RBBI's clone() method failed");
144
145    delete bi1clone;
146    delete bi2clone;
147    delete bi1;
148    delete bi3;
149    delete bi2;
150    delete biequal;
151}
152
153void RBBIAPITest::TestBoilerPlate()
154{
155    UErrorCode status = U_ZERO_ERROR;
156    BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
157    BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
158    if (U_FAILURE(status)) {
159        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
160        return;
161    }
162    if(*a!=*b){
163        errln("Failed: boilerplate method operator!= does not return correct results");
164    }
165    // Japanese word break iterators are identical to root with
166    // a dictionary-based break iterator
167    BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
168    BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
169    if(c && d){
170        if(*c!=*d){
171            errln("Failed: boilerplate method operator== does not return correct results");
172        }
173    }else{
174        errln("creation of break iterator failed");
175    }
176    delete a;
177    delete b;
178    delete c;
179    delete d;
180}
181
182void RBBIAPITest::TestgetRules()
183{
184    UErrorCode status=U_ZERO_ERROR;
185
186    RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
187    RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
188    if(U_FAILURE(status)){
189        errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
190        delete bi1;
191        delete bi2;
192        return;
193    }
194
195
196
197    logln((UnicodeString)"Testing toString()");
198
199    bi1->setText((UnicodeString)"Hello there");
200
201    RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
202
203    UnicodeString temp=bi1->getRules();
204    UnicodeString temp2=bi2->getRules();
205    UnicodeString temp3=bi3->getRules();
206    if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
207        errln((UnicodeString)"ERROR: error in getRules() method");
208
209    delete bi1;
210    delete bi2;
211    delete bi3;
212}
213void RBBIAPITest::TestHashCode()
214{
215    UErrorCode status=U_ZERO_ERROR;
216    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
217    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
218    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
219    if(U_FAILURE(status)){
220        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
221        delete bi1;
222        delete bi2;
223        delete bi3;
224        return;
225    }
226
227
228    logln((UnicodeString)"Testing hashCode()");
229
230    bi1->setText((UnicodeString)"Hash code");
231    bi2->setText((UnicodeString)"Hash code");
232    bi3->setText((UnicodeString)"Hash code");
233
234    RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
235    RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
236
237    if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
238        bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
239        errln((UnicodeString)"ERROR: identical objects have different hashcodes");
240
241    if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
242        bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
243        errln((UnicodeString)"ERROR: different objects have same hashcodes");
244
245    delete bi1clone;
246    delete bi2clone;
247    delete bi1;
248    delete bi2;
249    delete bi3;
250
251}
252void RBBIAPITest::TestGetSetAdoptText()
253{
254    logln((UnicodeString)"Testing getText setText ");
255    IcuTestErrorCode status(*this, "TestGetSetAdoptText");
256    UnicodeString str1="first string.";
257    UnicodeString str2="Second string.";
258    LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
259    LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
260    if(status.isFailure()){
261        errcheckln(status, "Fail : in construction - %s", status.errorName());
262            return;
263    }
264
265
266    CharacterIterator* text1= new StringCharacterIterator(str1);
267    CharacterIterator* text1Clone = text1->clone();
268    CharacterIterator* text2= new StringCharacterIterator(str2);
269    CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
270
271    wordIter1->setText(str1);
272    CharacterIterator *tci = &wordIter1->getText();
273    UnicodeString      tstr;
274    tci->getText(tstr);
275    TEST_ASSERT(tstr == str1);
276    if(wordIter1->current() != 0)
277        errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
278
279    wordIter1->next(2);
280
281    wordIter1->setText(str2);
282    if(wordIter1->current() != 0)
283        errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
284
285
286    charIter1->adoptText(text1Clone);
287    TEST_ASSERT(wordIter1->getText() != charIter1->getText());
288    tci = &wordIter1->getText();
289    tci->getText(tstr);
290    TEST_ASSERT(tstr == str2);
291    tci = &charIter1->getText();
292    tci->getText(tstr);
293    TEST_ASSERT(tstr == str1);
294
295
296    LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
297    rb->adoptText(text1);
298    if(rb->getText() != *text1)
299        errln((UnicodeString)"ERROR:1 error in adoptText ");
300    rb->adoptText(text2);
301    if(rb->getText() != *text2)
302        errln((UnicodeString)"ERROR:2 error in adoptText ");
303
304    // Adopt where iterator range is less than the entire orignal source string.
305    //   (With the change of the break engine to working with UText internally,
306    //    CharacterIterators starting at positions other than zero are not supported)
307    rb->adoptText(text3);
308    TEST_ASSERT(rb->preceding(2) == 0);
309    TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
310    //if(rb->preceding(2) != 3) {
311    //    errln((UnicodeString)"ERROR:3 error in adoptText ");
312    //}
313    //if(rb->following(11) != BreakIterator::DONE) {
314    //    errln((UnicodeString)"ERROR:4 error in adoptText ");
315    //}
316
317    // UText API
318    //
319    //   Quick test to see if UText is working at all.
320    //
321    const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
322    const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
323    //                012345678901
324
325    status.reset();
326    LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
327    wordIter1->setText(ut.getAlias(), status);
328    TEST_ASSERT_SUCCESS(status);
329
330    int32_t pos;
331    pos = wordIter1->first();
332    TEST_ASSERT(pos==0);
333    pos = wordIter1->next();
334    TEST_ASSERT(pos==5);
335    pos = wordIter1->next();
336    TEST_ASSERT(pos==6);
337    pos = wordIter1->next();
338    TEST_ASSERT(pos==11);
339    pos = wordIter1->next();
340    TEST_ASSERT(pos==UBRK_DONE);
341
342    status.reset();
343    LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
344    TEST_ASSERT_SUCCESS(status);
345    wordIter1->setText(ut2.getAlias(), status);
346    TEST_ASSERT_SUCCESS(status);
347
348    pos = wordIter1->first();
349    TEST_ASSERT(pos==0);
350    pos = wordIter1->next();
351    TEST_ASSERT(pos==3);
352    pos = wordIter1->next();
353    TEST_ASSERT(pos==4);
354
355    pos = wordIter1->last();
356    TEST_ASSERT(pos==6);
357    pos = wordIter1->previous();
358    TEST_ASSERT(pos==4);
359    pos = wordIter1->previous();
360    TEST_ASSERT(pos==3);
361    pos = wordIter1->previous();
362    TEST_ASSERT(pos==0);
363    pos = wordIter1->previous();
364    TEST_ASSERT(pos==UBRK_DONE);
365
366    status.reset();
367    UnicodeString sEmpty;
368    LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
369    wordIter1->getUText(gut2.getAlias(), status);
370    TEST_ASSERT_SUCCESS(status);
371    status.reset();
372}
373
374
375void RBBIAPITest::TestIteration()
376{
377    // This test just verifies that the API is present.
378    // Testing for correct operation of the break rules happens elsewhere.
379
380    UErrorCode status=U_ZERO_ERROR;
381    RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
382    if (U_FAILURE(status) || bi == NULL)  {
383        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
384    }
385    delete bi;
386
387    status=U_ZERO_ERROR;
388    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
389    if (U_FAILURE(status) || bi == NULL)  {
390        errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
391    }
392    delete bi;
393
394    status=U_ZERO_ERROR;
395    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
396    if (U_FAILURE(status) || bi == NULL)  {
397        errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
398    }
399    delete bi;
400
401    status=U_ZERO_ERROR;
402    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
403    if (U_FAILURE(status) || bi == NULL)  {
404        errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
405    }
406    delete bi;
407
408    status=U_ZERO_ERROR;
409    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
410    if (U_FAILURE(status) || bi == NULL)  {
411        errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
412    }
413    delete bi;
414
415    status=U_ZERO_ERROR;
416    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
417    if (U_FAILURE(status) || bi == NULL)  {
418        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
419        return;   // Skip the rest of these tests.
420    }
421
422
423    UnicodeString testString="0123456789";
424    bi->setText(testString);
425
426    int32_t i;
427    i = bi->first();
428    if (i != 0) {
429        errln("Incorrect value from bi->first().  Expected 0, got %d.", i);
430    }
431
432    i = bi->last();
433    if (i != 10) {
434        errln("Incorrect value from bi->last().  Expected 10, got %d", i);
435    }
436
437    //
438    // Previous
439    //
440    bi->last();
441    i = bi->previous();
442    if (i != 9) {
443        errln("Incorrect value from bi->last() at line %d.  Expected 9, got %d", __LINE__, i);
444    }
445
446
447    bi->first();
448    i = bi->previous();
449    if (i != BreakIterator::DONE) {
450        errln("Incorrect value from bi->previous() at line %d.  Expected DONE, got %d", __LINE__, i);
451    }
452
453    //
454    // next()
455    //
456    bi->first();
457    i = bi->next();
458    if (i != 1) {
459        errln("Incorrect value from bi->next() at line %d.  Expected 1, got %d", __LINE__, i);
460    }
461
462    bi->last();
463    i = bi->next();
464    if (i != BreakIterator::DONE) {
465        errln("Incorrect value from bi->next() at line %d.  Expected DONE, got %d", __LINE__, i);
466    }
467
468
469    //
470    //  current()
471    //
472    bi->first();
473    i = bi->current();
474    if (i != 0) {
475        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
476    }
477
478    bi->next();
479    i = bi->current();
480    if (i != 1) {
481        errln("Incorrect value from bi->previous() at line %d.  Expected 1, got %d", __LINE__, i);
482    }
483
484    bi->last();
485    bi->next();
486    i = bi->current();
487    if (i != 10) {
488        errln("Incorrect value from bi->previous() at line %d.  Expected 10, got %d", __LINE__, i);
489    }
490
491    bi->first();
492    bi->previous();
493    i = bi->current();
494    if (i != 0) {
495        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
496    }
497
498
499    //
500    // Following()
501    //
502    i = bi->following(4);
503    if (i != 5) {
504        errln("Incorrect value from bi->following() at line %d.  Expected 5, got %d", __LINE__, i);
505    }
506
507    i = bi->following(9);
508    if (i != 10) {
509        errln("Incorrect value from bi->following() at line %d.  Expected 10, got %d", __LINE__, i);
510    }
511
512    i = bi->following(10);
513    if (i != BreakIterator::DONE) {
514        errln("Incorrect value from bi->following() at line %d.  Expected DONE, got %d", __LINE__, i);
515    }
516
517
518    //
519    // Preceding
520    //
521    i = bi->preceding(4);
522    if (i != 3) {
523        errln("Incorrect value from bi->preceding() at line %d.  Expected 3, got %d", __LINE__, i);
524    }
525
526    i = bi->preceding(10);
527    if (i != 9) {
528        errln("Incorrect value from bi->preceding() at line %d.  Expected 9, got %d", __LINE__, i);
529    }
530
531    i = bi->preceding(1);
532    if (i != 0) {
533        errln("Incorrect value from bi->preceding() at line %d.  Expected 0, got %d", __LINE__, i);
534    }
535
536    i = bi->preceding(0);
537    if (i != BreakIterator::DONE) {
538        errln("Incorrect value from bi->preceding() at line %d.  Expected DONE, got %d", __LINE__, i);
539    }
540
541
542    //
543    // isBoundary()
544    //
545    bi->first();
546    if (bi->isBoundary(3) != TRUE) {
547        errln("Incorrect value from bi->isBoudary() at line %d.  Expected TRUE, got FALSE", __LINE__, i);
548    }
549    i = bi->current();
550    if (i != 3) {
551        errln("Incorrect value from bi->current() at line %d.  Expected 3, got %d", __LINE__, i);
552    }
553
554
555    if (bi->isBoundary(11) != FALSE) {
556        errln("Incorrect value from bi->isBoudary() at line %d.  Expected FALSE, got TRUE", __LINE__, i);
557    }
558    i = bi->current();
559    if (i != 10) {
560        errln("Incorrect value from bi->current() at line %d.  Expected 10, got %d", __LINE__, i);
561    }
562
563    //
564    // next(n)
565    //
566    bi->first();
567    i = bi->next(4);
568    if (i != 4) {
569        errln("Incorrect value from bi->next() at line %d.  Expected 4, got %d", __LINE__, i);
570    }
571
572    i = bi->next(6);
573    if (i != 10) {
574        errln("Incorrect value from bi->next() at line %d.  Expected 10, got %d", __LINE__, i);
575    }
576
577    bi->first();
578    i = bi->next(11);
579    if (i != BreakIterator::DONE) {
580        errln("Incorrect value from bi->next() at line %d.  Expected BreakIterator::DONE, got %d", __LINE__, i);
581    }
582
583    delete bi;
584
585}
586
587
588
589
590
591
592void RBBIAPITest::TestBuilder() {
593     UnicodeString rulesString1 = "$Letters = [:L:];\n"
594                                  "$Numbers = [:N:];\n"
595                                  "$Letters+;\n"
596                                  "$Numbers+;\n"
597                                  "[^$Letters $Numbers];\n"
598                                  "!.*;\n";
599     UnicodeString testString1  = "abc123..abc";
600                                // 01234567890
601     int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
602     UErrorCode status=U_ZERO_ERROR;
603     UParseError    parseError;
604
605     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
606     if(U_FAILURE(status)) {
607         dataerrln("Fail : in construction - %s", u_errorName(status));
608     } else {
609         bi->setText(testString1);
610         doBoundaryTest(*bi, testString1, bounds1);
611     }
612     delete bi;
613}
614
615
616//
617//  TestQuoteGrouping
618//       Single quotes within rules imply a grouping, so that a modifier
619//       following the quoted text (* or +) applies to all of the quoted chars.
620//
621void RBBIAPITest::TestQuoteGrouping() {
622     UnicodeString rulesString1 = "#Here comes the rule...\n"
623                                  "'$@!'*;\n"   //  (\$\@\!)*
624                                  ".;\n";
625
626     UnicodeString testString1  = "$@!$@!X$@!!X";
627                                // 0123456789012
628     int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
629     UErrorCode status=U_ZERO_ERROR;
630     UParseError    parseError;
631
632     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
633     if(U_FAILURE(status)) {
634         dataerrln("Fail : in construction - %s", u_errorName(status));
635     } else {
636         bi->setText(testString1);
637         doBoundaryTest(*bi, testString1, bounds1);
638     }
639     delete bi;
640}
641
642//
643//  TestRuleStatus
644//      Test word break rule status constants.
645//
646void RBBIAPITest::TestRuleStatus() {
647     UChar str[30];
648     //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
649     // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
650     u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
651              // 012345678901234567  8      9    0
652              //                     Katakana
653                str, 30);
654     UnicodeString testString1(str);
655     int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
656     int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
657                          UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
658                          UBRK_WORD_IDEO,     UBRK_WORD_NONE};
659
660     int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
661                          UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
662                          UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
663
664     UErrorCode status=U_ZERO_ERROR;
665
666     BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
667     if(U_FAILURE(status)) {
668         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
669     } else {
670         bi->setText(testString1);
671         // First test that the breaks are in the right spots.
672         doBoundaryTest(*bi, testString1, bounds1);
673
674         // Then go back and check tag values
675         int32_t i = 0;
676         int32_t pos, tag;
677         for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
678             if (pos != bounds1[i]) {
679                 errln("FAIL: unexpected word break at postion %d", pos);
680                 break;
681             }
682             tag = bi->getRuleStatus();
683             if (tag < tag_lo[i] || tag >= tag_hi[i]) {
684                 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
685                 break;
686             }
687
688             // Check that we get the same tag values from getRuleStatusVec()
689             int32_t vec[10];
690             int t = bi->getRuleStatusVec(vec, 10, status);
691             TEST_ASSERT_SUCCESS(status);
692             TEST_ASSERT(t==1);
693             TEST_ASSERT(vec[0] == tag);
694         }
695     }
696     delete bi;
697
698     // Now test line break status.  This test mostly is to confirm that the status constants
699     //                              are correctly declared in the header.
700     testString1 =   "test line. \n";
701     // break type    s    s     h
702
703     bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
704     if(U_FAILURE(status)) {
705         errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
706     } else {
707         int32_t i = 0;
708         int32_t pos, tag;
709         UBool   success;
710
711         bi->setText(testString1);
712         pos = bi->current();
713         tag = bi->getRuleStatus();
714         for (i=0; i<3; i++) {
715             switch (i) {
716             case 0:
717                 success = pos==0  && tag==UBRK_LINE_SOFT; break;
718             case 1:
719                 success = pos==5  && tag==UBRK_LINE_SOFT; break;
720             case 2:
721                 success = pos==12 && tag==UBRK_LINE_HARD; break;
722             default:
723                 success = FALSE; break;
724             }
725             if (success == FALSE) {
726                 errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
727                     i, pos, tag);
728                 break;
729             }
730             pos = bi->next();
731             tag = bi->getRuleStatus();
732         }
733         if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
734             UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
735             (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
736             errln("UBRK_LINE_* constants from header are inconsistent.");
737         }
738     }
739     delete bi;
740
741}
742
743
744//
745//  TestRuleStatusVec
746//      Test the vector form of  break rule status.
747//
748void RBBIAPITest::TestRuleStatusVec() {
749    UnicodeString rulesString(   "[A-N]{100}; \n"
750                                 "[a-w]{200}; \n"
751                                 "[\\p{L}]{300}; \n"
752                                 "[\\p{N}]{400}; \n"
753                                 "[0-5]{500}; \n"
754                                  "!.*;\n", -1, US_INV);
755     UnicodeString testString1  = "Aapz5?";
756     int32_t  statusVals[10];
757     int32_t  numStatuses;
758     int32_t  pos;
759
760     UErrorCode status=U_ZERO_ERROR;
761     UParseError    parseError;
762
763     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
764     if (U_FAILURE(status)) {
765         dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
766     } else {
767         bi->setText(testString1);
768
769         // A
770         pos = bi->next();
771         TEST_ASSERT(pos==1);
772         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
773         TEST_ASSERT_SUCCESS(status);
774         TEST_ASSERT(numStatuses == 2);
775         TEST_ASSERT(statusVals[0] == 100);
776         TEST_ASSERT(statusVals[1] == 300);
777
778         // a
779         pos = bi->next();
780         TEST_ASSERT(pos==2);
781         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
782         TEST_ASSERT_SUCCESS(status);
783         TEST_ASSERT(numStatuses == 2);
784         TEST_ASSERT(statusVals[0] == 200);
785         TEST_ASSERT(statusVals[1] == 300);
786
787         // p
788         pos = bi->next();
789         TEST_ASSERT(pos==3);
790         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
791         TEST_ASSERT_SUCCESS(status);
792         TEST_ASSERT(numStatuses == 2);
793         TEST_ASSERT(statusVals[0] == 200);
794         TEST_ASSERT(statusVals[1] == 300);
795
796         // z
797         pos = bi->next();
798         TEST_ASSERT(pos==4);
799         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
800         TEST_ASSERT_SUCCESS(status);
801         TEST_ASSERT(numStatuses == 1);
802         TEST_ASSERT(statusVals[0] == 300);
803
804         // 5
805         pos = bi->next();
806         TEST_ASSERT(pos==5);
807         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
808         TEST_ASSERT_SUCCESS(status);
809         TEST_ASSERT(numStatuses == 2);
810         TEST_ASSERT(statusVals[0] == 400);
811         TEST_ASSERT(statusVals[1] == 500);
812
813         // ?
814         pos = bi->next();
815         TEST_ASSERT(pos==6);
816         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
817         TEST_ASSERT_SUCCESS(status);
818         TEST_ASSERT(numStatuses == 1);
819         TEST_ASSERT(statusVals[0] == 0);
820
821         //
822         //  Check buffer overflow error handling.   Char == A
823         //
824         bi->first();
825         pos = bi->next();
826         TEST_ASSERT(pos==1);
827         memset(statusVals, -1, sizeof(statusVals));
828         numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
829         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
830         TEST_ASSERT(numStatuses == 2);
831         TEST_ASSERT(statusVals[0] == -1);
832
833         status = U_ZERO_ERROR;
834         memset(statusVals, -1, sizeof(statusVals));
835         numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
836         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
837         TEST_ASSERT(numStatuses == 2);
838         TEST_ASSERT(statusVals[0] == 100);
839         TEST_ASSERT(statusVals[1] == -1);
840
841         status = U_ZERO_ERROR;
842         memset(statusVals, -1, sizeof(statusVals));
843         numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
844         TEST_ASSERT_SUCCESS(status);
845         TEST_ASSERT(numStatuses == 2);
846         TEST_ASSERT(statusVals[0] == 100);
847         TEST_ASSERT(statusVals[1] == 300);
848         TEST_ASSERT(statusVals[2] == -1);
849     }
850     delete bi;
851
852}
853
854//
855//   Bug 2190 Regression test.   Builder crash on rule consisting of only a
856//                               $variable reference
857void RBBIAPITest::TestBug2190() {
858     UnicodeString rulesString1 = "$aaa = abcd;\n"
859                                  "$bbb = $aaa;\n"
860                                  "$bbb;\n";
861     UnicodeString testString1  = "abcdabcd";
862                                // 01234567890
863     int32_t bounds1[] = {0, 4, 8};
864     UErrorCode status=U_ZERO_ERROR;
865     UParseError    parseError;
866
867     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
868     if(U_FAILURE(status)) {
869         dataerrln("Fail : in construction - %s", u_errorName(status));
870     } else {
871         bi->setText(testString1);
872         doBoundaryTest(*bi, testString1, bounds1);
873     }
874     delete bi;
875}
876
877
878void RBBIAPITest::TestRegistration() {
879#if !UCONFIG_NO_SERVICE
880    UErrorCode status = U_ZERO_ERROR;
881    BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
882    // ok to not delete these if we exit because of error?
883    BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
884    BreakIterator* root_word = BreakIterator::createWordInstance("", status);
885    BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
886
887    if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
888        dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
889
890        delete ja_word;
891        delete ja_char;
892        delete root_word;
893        delete root_char;
894
895        return;
896    }
897
898    URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
899    {
900#if 0 // With a dictionary based word breaking, ja_word is identical to root.
901        if (ja_word && *ja_word == *root_word) {
902            errln("japan not different from root");
903        }
904#endif
905    }
906
907    {
908        BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
909        UBool fail = TRUE;
910        if(result){
911            fail = *result != *ja_word;
912        }
913        delete result;
914        if (fail) {
915            errln("bad result for xx_XX/word");
916        }
917    }
918
919    {
920        BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
921        UBool fail = TRUE;
922        if(result){
923            fail = *result != *ja_char;
924        }
925        delete result;
926        if (fail) {
927            errln("bad result for ja_JP/char");
928        }
929    }
930
931    {
932        BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
933        UBool fail = TRUE;
934        if(result){
935            fail = *result != *root_char;
936        }
937        delete result;
938        if (fail) {
939            errln("bad result for xx_XX/char");
940        }
941    }
942
943    {
944        StringEnumeration* avail = BreakIterator::getAvailableLocales();
945        UBool found = FALSE;
946        const UnicodeString* p;
947        while ((p = avail->snext(status))) {
948            if (p->compare("xx") == 0) {
949                found = TRUE;
950                break;
951            }
952        }
953        delete avail;
954        if (!found) {
955            errln("did not find test locale");
956        }
957    }
958
959    {
960        UBool unreg = BreakIterator::unregister(key, status);
961        if (!unreg) {
962            errln("unable to unregister");
963        }
964    }
965
966    {
967        BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
968        BreakIterator* root = BreakIterator::createWordInstance("", status);
969        UBool fail = TRUE;
970        if(root){
971          fail = *root != *result;
972        }
973        delete root;
974        delete result;
975        if (fail) {
976            errln("did not get root break");
977        }
978    }
979
980    {
981        StringEnumeration* avail = BreakIterator::getAvailableLocales();
982        UBool found = FALSE;
983        const UnicodeString* p;
984        while ((p = avail->snext(status))) {
985            if (p->compare("xx") == 0) {
986                found = TRUE;
987                break;
988            }
989        }
990        delete avail;
991        if (found) {
992            errln("found test locale");
993        }
994    }
995
996    {
997        int32_t count;
998        UBool   foundLocale = FALSE;
999        const Locale *avail = BreakIterator::getAvailableLocales(count);
1000        for (int i=0; i<count; i++) {
1001            if (avail[i] == Locale::getEnglish()) {
1002                foundLocale = TRUE;
1003                break;
1004            }
1005        }
1006        if (foundLocale == FALSE) {
1007            errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1008        }
1009    }
1010
1011
1012    // ja_word was adopted by factory
1013    delete ja_char;
1014    delete root_word;
1015    delete root_char;
1016#endif
1017}
1018
1019void RBBIAPITest::RoundtripRule(const char *dataFile) {
1020    UErrorCode status = U_ZERO_ERROR;
1021    UParseError parseError;
1022    parseError.line = 0;
1023    parseError.offset = 0;
1024    LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1025    uint32_t length;
1026    const UChar *builtSource;
1027    const uint8_t *rbbiRules;
1028    const uint8_t *builtRules;
1029
1030    if (U_FAILURE(status)) {
1031        errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1032        return;
1033    }
1034
1035    builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1036    builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1037    RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1038    if (U_FAILURE(status)) {
1039        errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1040                __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1041        errln(UnicodeString(builtSource));
1042        return;
1043    };
1044    rbbiRules = brkItr->getBinaryRules(length);
1045    logln("Comparing \"%s\" len=%d", dataFile, length);
1046    if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1047        errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1048        return;
1049    }
1050    delete brkItr;
1051}
1052
1053void RBBIAPITest::TestRoundtripRules() {
1054    RoundtripRule("word");
1055    RoundtripRule("title");
1056    RoundtripRule("sent");
1057    RoundtripRule("line");
1058    RoundtripRule("char");
1059    if (!quick) {
1060        RoundtripRule("word_POSIX");
1061    }
1062}
1063
1064
1065// Check getBinaryRules() and construction of a break iterator from those rules.
1066
1067void RBBIAPITest::TestGetBinaryRules() {
1068    UErrorCode status=U_ZERO_ERROR;
1069    LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1070    TEST_ASSERT_SUCCESS(status);
1071    RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1072    TEST_ASSERT(rbbi != NULL);
1073
1074    // Check that the new line break iterator is nominally functional.
1075    UnicodeString helloWorld("Hello, World!");
1076    rbbi->setText(helloWorld);
1077    int n = 0;
1078    while (bi->next() != UBRK_DONE) {
1079        ++n;
1080    }
1081    TEST_ASSERT(n == 2);
1082
1083    // Extract the binary rules as a uint8_t blob.
1084    uint32_t ruleLength;
1085    const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1086    TEST_ASSERT(ruleLength > 0);
1087    TEST_ASSERT(binRules != NULL);
1088
1089    // Clone the binary rules, and create a break iterator from that.
1090    // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1091    uint8_t *clonedRules = new uint8_t[ruleLength];
1092    memcpy(clonedRules, binRules, ruleLength);
1093    RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1094    TEST_ASSERT_SUCCESS(status);
1095
1096    // Check that the cloned line break iterator is nominally alive.
1097    clonedBI.setText(helloWorld);
1098    n = 0;
1099    while (clonedBI.next() != UBRK_DONE) {
1100        ++n;
1101    }
1102    TEST_ASSERT(n == 2);
1103
1104    delete[] clonedRules;
1105}
1106
1107
1108void RBBIAPITest::TestRefreshInputText() {
1109    /*
1110     *  RefreshInput changes out the input of a Break Iterator without
1111     *    changing anything else in the iterator's state.  Used with Java JNI,
1112     *    when Java moves the underlying string storage.   This test
1113     *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1114     *    The right set of boundaries should still be found.
1115     */
1116    UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
1117    UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
1118    UErrorCode status = U_ZERO_ERROR;
1119    UText ut1 = UTEXT_INITIALIZER;
1120    UText ut2 = UTEXT_INITIALIZER;
1121    RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1122    TEST_ASSERT_SUCCESS(status);
1123
1124    utext_openUChars(&ut1, testStr, -1, &status);
1125    TEST_ASSERT_SUCCESS(status);
1126
1127    if (U_SUCCESS(status)) {
1128        bi->setText(&ut1, status);
1129        TEST_ASSERT_SUCCESS(status);
1130
1131        /* Line boundaries will occur before each letter in the original string */
1132        TEST_ASSERT(1 == bi->next());
1133        TEST_ASSERT(3 == bi->next());
1134
1135        /* Move the string, kill the original string.  */
1136        u_strcpy(movedStr, testStr);
1137        u_memset(testStr, 0x20, u_strlen(testStr));
1138        utext_openUChars(&ut2, movedStr, -1, &status);
1139        TEST_ASSERT_SUCCESS(status);
1140        RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1141        TEST_ASSERT_SUCCESS(status);
1142        TEST_ASSERT(bi == returnedBI);
1143
1144        /* Find the following matches, now working in the moved string. */
1145        TEST_ASSERT(5 == bi->next());
1146        TEST_ASSERT(7 == bi->next());
1147        TEST_ASSERT(8 == bi->next());
1148        TEST_ASSERT(UBRK_DONE == bi->next());
1149
1150        utext_close(&ut1);
1151        utext_close(&ut2);
1152    }
1153    delete bi;
1154
1155}
1156
1157#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1158static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1159  static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1160  it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1161
1162  int32_t *pos = new int32_t[ustr.length()];
1163  int32_t posCount = 0;
1164
1165  // calculate breaks up front, so we can print out
1166  // sans any debugging
1167  for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1168    pos[posCount++] = n;
1169    if(posCount>=ustr.length()) {
1170      it.errln("brk count exceeds string length!");
1171      return;
1172    }
1173  }
1174  UnicodeString out;
1175  out.append((UChar)CHSTR);
1176  int32_t prev = 0;
1177  for(int32_t i=0;i<posCount;i++) {
1178    int32_t n=pos[i];
1179    out.append(ustr.tempSubString(prev,n-prev));
1180    out.append((UChar)PILCROW);
1181    prev=n;
1182  }
1183  out.append(ustr.tempSubString(prev,ustr.length()-prev));
1184  out.append((UChar)CHEND);
1185  it.logln(out);
1186
1187  out.remove();
1188  for(int32_t i=0;i<posCount;i++) {
1189    char tmp[100];
1190    sprintf(tmp,"%d ",pos[i]);
1191    out.append(UnicodeString(tmp));
1192  }
1193  it.logln(out);
1194  delete [] pos;
1195}
1196#endif
1197
1198void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1199#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1200  UErrorCode status = U_ZERO_ERROR;
1201  LocalPointer<FilteredBreakIteratorBuilder> builder;
1202  LocalPointer<BreakIterator> baseBI;
1203  LocalPointer<BreakIterator> filteredBI;
1204  LocalPointer<BreakIterator> frenchBI;
1205
1206  const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1207  const UnicodeString ABBR_MR("Mr.");
1208  const UnicodeString ABBR_CAPT("Capt.");
1209
1210  {
1211    logln("Constructing empty builder\n");
1212    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1213    TEST_ASSERT_SUCCESS(status);
1214
1215    logln("Constructing base BI\n");
1216    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1217    TEST_ASSERT_SUCCESS(status);
1218
1219	logln("Building new BI\n");
1220    filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1221    TEST_ASSERT_SUCCESS(status);
1222
1223	if (U_SUCCESS(status)) {
1224        logln("Testing:");
1225        filteredBI->setText(text);
1226        TEST_ASSERT(20 == filteredBI->next()); // Mr.
1227        TEST_ASSERT(84 == filteredBI->next()); // recovered.
1228        TEST_ASSERT(90 == filteredBI->next()); // Capt.
1229        TEST_ASSERT(181 == filteredBI->next()); // Mr.
1230        TEST_ASSERT(278 == filteredBI->next()); // charge.
1231        filteredBI->first();
1232        prtbrks(filteredBI.getAlias(), text, *this);
1233    }
1234  }
1235
1236  {
1237    logln("Constructing empty builder\n");
1238    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1239    TEST_ASSERT_SUCCESS(status);
1240
1241    if (U_SUCCESS(status)) {
1242        logln("Adding Mr. as an exception\n");
1243        TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1244        TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1245        TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1246        TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1247        TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1248        TEST_ASSERT_SUCCESS(status);
1249
1250        logln("Constructing base BI\n");
1251        baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1252        TEST_ASSERT_SUCCESS(status);
1253
1254        logln("Building new BI\n");
1255        filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1256        TEST_ASSERT_SUCCESS(status);
1257
1258        logln("Testing:");
1259        filteredBI->setText(text);
1260        TEST_ASSERT(84 == filteredBI->next());
1261        TEST_ASSERT(90 == filteredBI->next());// Capt.
1262        TEST_ASSERT(278 == filteredBI->next());
1263        filteredBI->first();
1264        prtbrks(filteredBI.getAlias(), text, *this);
1265    }
1266  }
1267
1268
1269  {
1270    logln("Constructing empty builder\n");
1271    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1272    TEST_ASSERT_SUCCESS(status);
1273
1274    if (U_SUCCESS(status)) {
1275        logln("Adding Mr. and Capt as an exception\n");
1276        TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1277        TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1278        TEST_ASSERT_SUCCESS(status);
1279
1280        logln("Constructing base BI\n");
1281        baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1282        TEST_ASSERT_SUCCESS(status);
1283
1284        logln("Building new BI\n");
1285        filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1286        TEST_ASSERT_SUCCESS(status);
1287
1288        logln("Testing:");
1289        filteredBI->setText(text);
1290        TEST_ASSERT(84 == filteredBI->next());
1291        TEST_ASSERT(278 == filteredBI->next());
1292        filteredBI->first();
1293        prtbrks(filteredBI.getAlias(), text, *this);
1294    }
1295  }
1296
1297
1298  {
1299    logln("Constructing English builder\n");
1300    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1301    TEST_ASSERT_SUCCESS(status);
1302
1303    logln("Constructing base BI\n");
1304    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1305    TEST_ASSERT_SUCCESS(status);
1306
1307    if (U_SUCCESS(status)) {
1308        logln("unsuppressing 'Capt'");
1309        TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1310
1311        logln("Building new BI\n");
1312        filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1313        TEST_ASSERT_SUCCESS(status);
1314
1315        if(filteredBI.isValid()) {
1316          logln("Testing:");
1317          filteredBI->setText(text);
1318          TEST_ASSERT(84 == filteredBI->next());
1319          TEST_ASSERT(90 == filteredBI->next());
1320          TEST_ASSERT(278 == filteredBI->next());
1321          filteredBI->first();
1322          prtbrks(filteredBI.getAlias(), text, *this);
1323        }
1324    }
1325  }
1326
1327
1328  {
1329    logln("Constructing English builder\n");
1330    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1331    TEST_ASSERT_SUCCESS(status);
1332
1333    logln("Constructing base BI\n");
1334    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1335    TEST_ASSERT_SUCCESS(status);
1336
1337    if (U_SUCCESS(status)) {
1338        logln("Building new BI\n");
1339        filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1340        TEST_ASSERT_SUCCESS(status);
1341
1342        if(filteredBI.isValid()) {
1343          logln("Testing:");
1344          filteredBI->setText(text);
1345          TEST_ASSERT(84 == filteredBI->next());
1346          TEST_ASSERT(278 == filteredBI->next());
1347          filteredBI->first();
1348          prtbrks(filteredBI.getAlias(), text, *this);
1349        }
1350    }
1351  }
1352
1353  // reenable once french is in
1354  {
1355    logln("Constructing French builder");
1356    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1357    TEST_ASSERT_SUCCESS(status);
1358
1359    logln("Constructing base BI\n");
1360    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1361    TEST_ASSERT_SUCCESS(status);
1362
1363    if (U_SUCCESS(status)) {
1364        logln("Building new BI\n");
1365        frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1366        TEST_ASSERT_SUCCESS(status);
1367    }
1368
1369    if(frenchBI.isValid()) {
1370      logln("Testing:");
1371      UnicodeString frText("C'est MM. Duval.");
1372      frenchBI->setText(frText);
1373      TEST_ASSERT(16 == frenchBI->next());
1374      TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1375      frenchBI->first();
1376      prtbrks(frenchBI.getAlias(), frText, *this);
1377      logln("Testing against English:");
1378      filteredBI->setText(frText);
1379      TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1380      TEST_ASSERT(16 == filteredBI->next());
1381      TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1382      filteredBI->first();
1383      prtbrks(filteredBI.getAlias(), frText, *this);
1384
1385      // Verify ==
1386      TEST_ASSERT_TRUE(*frenchBI   == *frenchBI);
1387      TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
1388      TEST_ASSERT_TRUE(*frenchBI   != *filteredBI);
1389    } else {
1390      dataerrln("French BI: not valid.");
1391	}
1392  }
1393
1394#else
1395  logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1396#endif
1397}
1398
1399//---------------------------------------------
1400// runIndexedTest
1401//---------------------------------------------
1402
1403void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1404{
1405    if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1406    TESTCASE_AUTO_BEGIN;
1407#if !UCONFIG_NO_FILE_IO
1408    TESTCASE_AUTO(TestCloneEquals);
1409    TESTCASE_AUTO(TestgetRules);
1410    TESTCASE_AUTO(TestHashCode);
1411    TESTCASE_AUTO(TestGetSetAdoptText);
1412    TESTCASE_AUTO(TestIteration);
1413#endif
1414    TESTCASE_AUTO(TestBuilder);
1415    TESTCASE_AUTO(TestQuoteGrouping);
1416    TESTCASE_AUTO(TestRuleStatusVec);
1417    TESTCASE_AUTO(TestBug2190);
1418#if !UCONFIG_NO_FILE_IO
1419    TESTCASE_AUTO(TestRegistration);
1420    TESTCASE_AUTO(TestBoilerPlate);
1421    TESTCASE_AUTO(TestRuleStatus);
1422    TESTCASE_AUTO(TestRoundtripRules);
1423    TESTCASE_AUTO(TestGetBinaryRules);
1424#endif
1425    TESTCASE_AUTO(TestRefreshInputText);
1426#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
1427    TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1428#endif
1429    TESTCASE_AUTO_END;
1430}
1431
1432
1433//---------------------------------------------
1434//Internal subroutines
1435//---------------------------------------------
1436
1437void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1438     logln((UnicodeString)"testIsBoundary():");
1439        int32_t p = 0;
1440        UBool isB;
1441        for (int32_t i = 0; i < text.length(); i++) {
1442            isB = bi.isBoundary(i);
1443            logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1444
1445            if (i == boundaries[p]) {
1446                if (!isB)
1447                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1448                p++;
1449            }
1450            else {
1451                if (isB)
1452                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1453            }
1454        }
1455}
1456void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1457    UnicodeString selected;
1458    UnicodeString expected=CharsToUnicodeString(expectedString);
1459
1460    if(gotoffset != expectedOffset)
1461         errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1462    if(start <= gotoffset){
1463        testString.extractBetween(start, gotoffset, selected);
1464    }
1465    else{
1466        testString.extractBetween(gotoffset, start, selected);
1467    }
1468    if(selected.compare(expected) != 0)
1469         errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1470    else
1471        logln(prettify("****selected \"" + selected + "\""));
1472}
1473
1474#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1475