1/********************************************************************
2 * Copyright (c) 1999-2013, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 *   Date        Name        Description
6 *   12/14/99    Madhu        Creation.
7 *   01/12/2000  Madhu        updated for changed API
8 ********************************************************************/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/uchar.h"
15#include "intltest.h"
16#include "unicode/rbbi.h"
17#include "unicode/schriter.h"
18#include "rbbiapts.h"
19#include "rbbidata.h"
20#include "cstring.h"
21#include "ubrkimpl.h"
22#include "unicode/locid.h"
23#include "unicode/ustring.h"
24#include "unicode/utext.h"
25#include "cmemory.h"
26
27/**
28 * API Test the RuleBasedBreakIterator class
29 */
30
31
32#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
33dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
34
35#define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
36    errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
37
38void RBBIAPITest::TestCloneEquals()
39{
40
41    UErrorCode status=U_ZERO_ERROR;
42    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
43    RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
44    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
45    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
46    if(U_FAILURE(status)){
47        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
48        return;
49    }
50
51
52    UnicodeString testString="Testing word break iterators's clone() and equals()";
53    bi1->setText(testString);
54    bi2->setText(testString);
55    biequal->setText(testString);
56
57    bi3->setText("hello");
58
59    logln((UnicodeString)"Testing equals()");
60
61    logln((UnicodeString)"Testing == and !=");
62    UBool b = (*bi1 != *biequal);
63    b |= *bi1 == *bi2;
64    b |= *bi1 == *bi3;
65    if (b) {
66        errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
67    }
68
69    if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
70        errln((UnicodeString)"ERROR:2 RBBI's == and != operator  failed.");
71
72
73    // Quick test of RulesBasedBreakIterator assignment -
74    // Check that
75    //    two different iterators are !=
76    //    they are == after assignment
77    //    source and dest iterator produce the same next() after assignment.
78    //    deleting one doesn't disable the other.
79    logln("Testing assignment");
80    RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
81    if(U_FAILURE(status)){
82        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
83        return;
84    }
85
86    RuleBasedBreakIterator biDefault, biDefault2;
87    if(U_FAILURE(status)){
88        errln((UnicodeString)"FAIL : in construction of default iterator");
89        return;
90    }
91    if (biDefault == *bix) {
92        errln((UnicodeString)"ERROR: iterators should not compare ==");
93        return;
94    }
95    if (biDefault != biDefault2) {
96        errln((UnicodeString)"ERROR: iterators should compare ==");
97        return;
98    }
99
100
101    UnicodeString   HelloString("Hello Kitty");
102    bix->setText(HelloString);
103    if (*bix == *bi2) {
104        errln(UnicodeString("ERROR: strings should not be equal before assignment."));
105    }
106    *bix = *bi2;
107    if (*bix != *bi2) {
108        errln(UnicodeString("ERROR: strings should be equal before assignment."));
109    }
110
111    int bixnext = bix->next();
112    int bi2next = bi2->next();
113    if (! (bixnext == bi2next && bixnext == 7)) {
114        errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
115    }
116    delete bix;
117    if (bi2->next() != 8) {
118        errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
119    }
120
121
122
123    logln((UnicodeString)"Testing clone()");
124    RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
125    RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
126
127    if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
128      *bi1clone == *bi3 || *bi1clone == *bi2)
129        errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
130
131    if(*bi2clone == *bi1 || *bi2clone == *biequal ||
132       *bi2clone == *bi3 || *bi2clone != *bi2)
133        errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
134
135    if(bi1->getText() != bi1clone->getText()   ||
136       bi2clone->getText() != bi2->getText()   ||
137       *bi2clone == *bi1clone )
138        errln((UnicodeString)"ERROR: RBBI's clone() method failed");
139
140    delete bi1clone;
141    delete bi2clone;
142    delete bi1;
143    delete bi3;
144    delete bi2;
145    delete biequal;
146}
147
148void RBBIAPITest::TestBoilerPlate()
149{
150    UErrorCode status = U_ZERO_ERROR;
151    BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
152    BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
153    if (U_FAILURE(status)) {
154        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
155        return;
156    }
157    if(*a!=*b){
158        errln("Failed: boilerplate method operator!= does not return correct results");
159    }
160    // Japanese word break iterators are identical to root with
161    // a dictionary-based break iterator
162    BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
163    BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
164    if(c && d){
165        if(*c!=*d){
166            errln("Failed: boilerplate method operator== does not return correct results");
167        }
168    }else{
169        errln("creation of break iterator failed");
170    }
171    delete a;
172    delete b;
173    delete c;
174    delete d;
175}
176
177void RBBIAPITest::TestgetRules()
178{
179    UErrorCode status=U_ZERO_ERROR;
180
181    RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
182    RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
183    if(U_FAILURE(status)){
184        errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
185        delete bi1;
186        delete bi2;
187        return;
188    }
189
190
191
192    logln((UnicodeString)"Testing toString()");
193
194    bi1->setText((UnicodeString)"Hello there");
195
196    RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
197
198    UnicodeString temp=bi1->getRules();
199    UnicodeString temp2=bi2->getRules();
200    UnicodeString temp3=bi3->getRules();
201    if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
202        errln((UnicodeString)"ERROR: error in getRules() method");
203
204    delete bi1;
205    delete bi2;
206    delete bi3;
207}
208void RBBIAPITest::TestHashCode()
209{
210    UErrorCode status=U_ZERO_ERROR;
211    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
212    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
213    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
214    if(U_FAILURE(status)){
215        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
216        delete bi1;
217        delete bi2;
218        delete bi3;
219        return;
220    }
221
222
223    logln((UnicodeString)"Testing hashCode()");
224
225    bi1->setText((UnicodeString)"Hash code");
226    bi2->setText((UnicodeString)"Hash code");
227    bi3->setText((UnicodeString)"Hash code");
228
229    RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
230    RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
231
232    if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
233        bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
234        errln((UnicodeString)"ERROR: identical objects have different hashcodes");
235
236    if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
237        bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
238        errln((UnicodeString)"ERROR: different objects have same hashcodes");
239
240    delete bi1clone;
241    delete bi2clone;
242    delete bi1;
243    delete bi2;
244    delete bi3;
245
246}
247void RBBIAPITest::TestGetSetAdoptText()
248{
249    logln((UnicodeString)"Testing getText setText ");
250    IcuTestErrorCode status(*this, "TestGetSetAdoptText");
251    UnicodeString str1="first string.";
252    UnicodeString str2="Second string.";
253    LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
254    LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
255    if(status.isFailure()){
256        errcheckln(status, "Fail : in construction - %s", status.errorName());
257            return;
258    }
259
260
261    CharacterIterator* text1= new StringCharacterIterator(str1);
262    CharacterIterator* text1Clone = text1->clone();
263    CharacterIterator* text2= new StringCharacterIterator(str2);
264    CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
265
266    wordIter1->setText(str1);
267    CharacterIterator *tci = &wordIter1->getText();
268    UnicodeString      tstr;
269    tci->getText(tstr);
270    TEST_ASSERT(tstr == str1);
271    if(wordIter1->current() != 0)
272        errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
273
274    wordIter1->next(2);
275
276    wordIter1->setText(str2);
277    if(wordIter1->current() != 0)
278        errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
279
280
281    charIter1->adoptText(text1Clone);
282    TEST_ASSERT(wordIter1->getText() != charIter1->getText());
283    tci = &wordIter1->getText();
284    tci->getText(tstr);
285    TEST_ASSERT(tstr == str2);
286    tci = &charIter1->getText();
287    tci->getText(tstr);
288    TEST_ASSERT(tstr == str1);
289
290
291    LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
292    rb->adoptText(text1);
293    if(rb->getText() != *text1)
294        errln((UnicodeString)"ERROR:1 error in adoptText ");
295    rb->adoptText(text2);
296    if(rb->getText() != *text2)
297        errln((UnicodeString)"ERROR:2 error in adoptText ");
298
299    // Adopt where iterator range is less than the entire orignal source string.
300    //   (With the change of the break engine to working with UText internally,
301    //    CharacterIterators starting at positions other than zero are not supported)
302    rb->adoptText(text3);
303    TEST_ASSERT(rb->preceding(2) == 0);
304    TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
305    //if(rb->preceding(2) != 3) {
306    //    errln((UnicodeString)"ERROR:3 error in adoptText ");
307    //}
308    //if(rb->following(11) != BreakIterator::DONE) {
309    //    errln((UnicodeString)"ERROR:4 error in adoptText ");
310    //}
311
312    // UText API
313    //
314    //   Quick test to see if UText is working at all.
315    //
316    const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
317    const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
318    //                012345678901
319
320    status.reset();
321    LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
322    wordIter1->setText(ut.getAlias(), status);
323    TEST_ASSERT_SUCCESS(status);
324
325    int32_t pos;
326    pos = wordIter1->first();
327    TEST_ASSERT(pos==0);
328    pos = wordIter1->next();
329    TEST_ASSERT(pos==5);
330    pos = wordIter1->next();
331    TEST_ASSERT(pos==6);
332    pos = wordIter1->next();
333    TEST_ASSERT(pos==11);
334    pos = wordIter1->next();
335    TEST_ASSERT(pos==UBRK_DONE);
336
337    status.reset();
338    LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
339    TEST_ASSERT_SUCCESS(status);
340    wordIter1->setText(ut2.getAlias(), status);
341    TEST_ASSERT_SUCCESS(status);
342
343    pos = wordIter1->first();
344    TEST_ASSERT(pos==0);
345    pos = wordIter1->next();
346    TEST_ASSERT(pos==3);
347    pos = wordIter1->next();
348    TEST_ASSERT(pos==4);
349
350    pos = wordIter1->last();
351    TEST_ASSERT(pos==6);
352    pos = wordIter1->previous();
353    TEST_ASSERT(pos==4);
354    pos = wordIter1->previous();
355    TEST_ASSERT(pos==3);
356    pos = wordIter1->previous();
357    TEST_ASSERT(pos==0);
358    pos = wordIter1->previous();
359    TEST_ASSERT(pos==UBRK_DONE);
360
361    status.reset();
362    UnicodeString sEmpty;
363    LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
364    wordIter1->getUText(gut2.getAlias(), status);
365    TEST_ASSERT_SUCCESS(status);
366    status.reset();
367}
368
369
370void RBBIAPITest::TestIteration()
371{
372    // This test just verifies that the API is present.
373    // Testing for correct operation of the break rules happens elsewhere.
374
375    UErrorCode status=U_ZERO_ERROR;
376    RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
377    if (U_FAILURE(status) || bi == NULL)  {
378        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
379    }
380    delete bi;
381
382    status=U_ZERO_ERROR;
383    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
384    if (U_FAILURE(status) || bi == NULL)  {
385        errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
386    }
387    delete bi;
388
389    status=U_ZERO_ERROR;
390    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
391    if (U_FAILURE(status) || bi == NULL)  {
392        errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
393    }
394    delete bi;
395
396    status=U_ZERO_ERROR;
397    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
398    if (U_FAILURE(status) || bi == NULL)  {
399        errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
400    }
401    delete bi;
402
403    status=U_ZERO_ERROR;
404    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
405    if (U_FAILURE(status) || bi == NULL)  {
406        errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
407    }
408    delete bi;
409
410    status=U_ZERO_ERROR;
411    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
412    if (U_FAILURE(status) || bi == NULL)  {
413        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
414        return;   // Skip the rest of these tests.
415    }
416
417
418    UnicodeString testString="0123456789";
419    bi->setText(testString);
420
421    int32_t i;
422    i = bi->first();
423    if (i != 0) {
424        errln("Incorrect value from bi->first().  Expected 0, got %d.", i);
425    }
426
427    i = bi->last();
428    if (i != 10) {
429        errln("Incorrect value from bi->last().  Expected 10, got %d", i);
430    }
431
432    //
433    // Previous
434    //
435    bi->last();
436    i = bi->previous();
437    if (i != 9) {
438        errln("Incorrect value from bi->last() at line %d.  Expected 9, got %d", __LINE__, i);
439    }
440
441
442    bi->first();
443    i = bi->previous();
444    if (i != BreakIterator::DONE) {
445        errln("Incorrect value from bi->previous() at line %d.  Expected DONE, got %d", __LINE__, i);
446    }
447
448    //
449    // next()
450    //
451    bi->first();
452    i = bi->next();
453    if (i != 1) {
454        errln("Incorrect value from bi->next() at line %d.  Expected 1, got %d", __LINE__, i);
455    }
456
457    bi->last();
458    i = bi->next();
459    if (i != BreakIterator::DONE) {
460        errln("Incorrect value from bi->next() at line %d.  Expected DONE, got %d", __LINE__, i);
461    }
462
463
464    //
465    //  current()
466    //
467    bi->first();
468    i = bi->current();
469    if (i != 0) {
470        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
471    }
472
473    bi->next();
474    i = bi->current();
475    if (i != 1) {
476        errln("Incorrect value from bi->previous() at line %d.  Expected 1, got %d", __LINE__, i);
477    }
478
479    bi->last();
480    bi->next();
481    i = bi->current();
482    if (i != 10) {
483        errln("Incorrect value from bi->previous() at line %d.  Expected 10, got %d", __LINE__, i);
484    }
485
486    bi->first();
487    bi->previous();
488    i = bi->current();
489    if (i != 0) {
490        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
491    }
492
493
494    //
495    // Following()
496    //
497    i = bi->following(4);
498    if (i != 5) {
499        errln("Incorrect value from bi->following() at line %d.  Expected 5, got %d", __LINE__, i);
500    }
501
502    i = bi->following(9);
503    if (i != 10) {
504        errln("Incorrect value from bi->following() at line %d.  Expected 10, got %d", __LINE__, i);
505    }
506
507    i = bi->following(10);
508    if (i != BreakIterator::DONE) {
509        errln("Incorrect value from bi->following() at line %d.  Expected DONE, got %d", __LINE__, i);
510    }
511
512
513    //
514    // Preceding
515    //
516    i = bi->preceding(4);
517    if (i != 3) {
518        errln("Incorrect value from bi->preceding() at line %d.  Expected 3, got %d", __LINE__, i);
519    }
520
521    i = bi->preceding(10);
522    if (i != 9) {
523        errln("Incorrect value from bi->preceding() at line %d.  Expected 9, got %d", __LINE__, i);
524    }
525
526    i = bi->preceding(1);
527    if (i != 0) {
528        errln("Incorrect value from bi->preceding() at line %d.  Expected 0, got %d", __LINE__, i);
529    }
530
531    i = bi->preceding(0);
532    if (i != BreakIterator::DONE) {
533        errln("Incorrect value from bi->preceding() at line %d.  Expected DONE, got %d", __LINE__, i);
534    }
535
536
537    //
538    // isBoundary()
539    //
540    bi->first();
541    if (bi->isBoundary(3) != TRUE) {
542        errln("Incorrect value from bi->isBoudary() at line %d.  Expected TRUE, got FALSE", __LINE__, i);
543    }
544    i = bi->current();
545    if (i != 3) {
546        errln("Incorrect value from bi->current() at line %d.  Expected 3, got %d", __LINE__, i);
547    }
548
549
550    if (bi->isBoundary(11) != FALSE) {
551        errln("Incorrect value from bi->isBoudary() at line %d.  Expected FALSE, got TRUE", __LINE__, i);
552    }
553    i = bi->current();
554    if (i != 10) {
555        errln("Incorrect value from bi->current() at line %d.  Expected 10, got %d", __LINE__, i);
556    }
557
558    //
559    // next(n)
560    //
561    bi->first();
562    i = bi->next(4);
563    if (i != 4) {
564        errln("Incorrect value from bi->next() at line %d.  Expected 4, got %d", __LINE__, i);
565    }
566
567    i = bi->next(6);
568    if (i != 10) {
569        errln("Incorrect value from bi->next() at line %d.  Expected 10, got %d", __LINE__, i);
570    }
571
572    bi->first();
573    i = bi->next(11);
574    if (i != BreakIterator::DONE) {
575        errln("Incorrect value from bi->next() at line %d.  Expected BreakIterator::DONE, got %d", __LINE__, i);
576    }
577
578    delete bi;
579
580}
581
582
583
584
585
586
587void RBBIAPITest::TestBuilder() {
588     UnicodeString rulesString1 = "$Letters = [:L:];\n"
589                                  "$Numbers = [:N:];\n"
590                                  "$Letters+;\n"
591                                  "$Numbers+;\n"
592                                  "[^$Letters $Numbers];\n"
593                                  "!.*;\n";
594     UnicodeString testString1  = "abc123..abc";
595                                // 01234567890
596     int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
597     UErrorCode status=U_ZERO_ERROR;
598     UParseError    parseError;
599
600     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
601     if(U_FAILURE(status)) {
602         dataerrln("Fail : in construction - %s", u_errorName(status));
603     } else {
604         bi->setText(testString1);
605         doBoundaryTest(*bi, testString1, bounds1);
606     }
607     delete bi;
608}
609
610
611//
612//  TestQuoteGrouping
613//       Single quotes within rules imply a grouping, so that a modifier
614//       following the quoted text (* or +) applies to all of the quoted chars.
615//
616void RBBIAPITest::TestQuoteGrouping() {
617     UnicodeString rulesString1 = "#Here comes the rule...\n"
618                                  "'$@!'*;\n"   //  (\$\@\!)*
619                                  ".;\n";
620
621     UnicodeString testString1  = "$@!$@!X$@!!X";
622                                // 0123456789012
623     int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
624     UErrorCode status=U_ZERO_ERROR;
625     UParseError    parseError;
626
627     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
628     if(U_FAILURE(status)) {
629         dataerrln("Fail : in construction - %s", u_errorName(status));
630     } else {
631         bi->setText(testString1);
632         doBoundaryTest(*bi, testString1, bounds1);
633     }
634     delete bi;
635}
636
637//
638//  TestRuleStatus
639//      Test word break rule status constants.
640//
641void RBBIAPITest::TestRuleStatus() {
642     UChar str[30];
643     //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
644     // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
645     u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
646              // 012345678901234567  8      9    0
647              //                     Katakana
648                str, 30);
649     UnicodeString testString1(str);
650     int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
651     int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
652                          UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
653                          UBRK_WORD_IDEO,     UBRK_WORD_NONE};
654
655     int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
656                          UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
657                          UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
658
659     UErrorCode status=U_ZERO_ERROR;
660
661     BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
662     if(U_FAILURE(status)) {
663         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
664     } else {
665         bi->setText(testString1);
666         // First test that the breaks are in the right spots.
667         doBoundaryTest(*bi, testString1, bounds1);
668
669         // Then go back and check tag values
670         int32_t i = 0;
671         int32_t pos, tag;
672         for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
673             if (pos != bounds1[i]) {
674                 errln("FAIL: unexpected word break at postion %d", pos);
675                 break;
676             }
677             tag = bi->getRuleStatus();
678             if (tag < tag_lo[i] || tag >= tag_hi[i]) {
679                 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
680                 break;
681             }
682
683             // Check that we get the same tag values from getRuleStatusVec()
684             int32_t vec[10];
685             int t = bi->getRuleStatusVec(vec, 10, status);
686             TEST_ASSERT_SUCCESS(status);
687             TEST_ASSERT(t==1);
688             TEST_ASSERT(vec[0] == tag);
689         }
690     }
691     delete bi;
692
693     // Now test line break status.  This test mostly is to confirm that the status constants
694     //                              are correctly declared in the header.
695     testString1 =   "test line. \n";
696     // break type    s    s     h
697
698     bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
699     if(U_FAILURE(status)) {
700         errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
701     } else {
702         int32_t i = 0;
703         int32_t pos, tag;
704         UBool   success;
705
706         bi->setText(testString1);
707         pos = bi->current();
708         tag = bi->getRuleStatus();
709         for (i=0; i<3; i++) {
710             switch (i) {
711             case 0:
712                 success = pos==0  && tag==UBRK_LINE_SOFT; break;
713             case 1:
714                 success = pos==5  && tag==UBRK_LINE_SOFT; break;
715             case 2:
716                 success = pos==12 && tag==UBRK_LINE_HARD; break;
717             default:
718                 success = FALSE; break;
719             }
720             if (success == FALSE) {
721                 errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
722                     i, pos, tag);
723                 break;
724             }
725             pos = bi->next();
726             tag = bi->getRuleStatus();
727         }
728         if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
729             UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
730             (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
731             errln("UBRK_LINE_* constants from header are inconsistent.");
732         }
733     }
734     delete bi;
735
736}
737
738
739//
740//  TestRuleStatusVec
741//      Test the vector form of  break rule status.
742//
743void RBBIAPITest::TestRuleStatusVec() {
744    UnicodeString rulesString(   "[A-N]{100}; \n"
745                                 "[a-w]{200}; \n"
746                                 "[\\p{L}]{300}; \n"
747                                 "[\\p{N}]{400}; \n"
748                                 "[0-5]{500}; \n"
749                                  "!.*;\n", -1, US_INV);
750     UnicodeString testString1  = "Aapz5?";
751     int32_t  statusVals[10];
752     int32_t  numStatuses;
753     int32_t  pos;
754
755     UErrorCode status=U_ZERO_ERROR;
756     UParseError    parseError;
757
758     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
759     if (U_FAILURE(status)) {
760         dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
761     } else {
762         bi->setText(testString1);
763
764         // A
765         pos = bi->next();
766         TEST_ASSERT(pos==1);
767         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
768         TEST_ASSERT_SUCCESS(status);
769         TEST_ASSERT(numStatuses == 2);
770         TEST_ASSERT(statusVals[0] == 100);
771         TEST_ASSERT(statusVals[1] == 300);
772
773         // a
774         pos = bi->next();
775         TEST_ASSERT(pos==2);
776         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
777         TEST_ASSERT_SUCCESS(status);
778         TEST_ASSERT(numStatuses == 2);
779         TEST_ASSERT(statusVals[0] == 200);
780         TEST_ASSERT(statusVals[1] == 300);
781
782         // p
783         pos = bi->next();
784         TEST_ASSERT(pos==3);
785         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
786         TEST_ASSERT_SUCCESS(status);
787         TEST_ASSERT(numStatuses == 2);
788         TEST_ASSERT(statusVals[0] == 200);
789         TEST_ASSERT(statusVals[1] == 300);
790
791         // z
792         pos = bi->next();
793         TEST_ASSERT(pos==4);
794         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
795         TEST_ASSERT_SUCCESS(status);
796         TEST_ASSERT(numStatuses == 1);
797         TEST_ASSERT(statusVals[0] == 300);
798
799         // 5
800         pos = bi->next();
801         TEST_ASSERT(pos==5);
802         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
803         TEST_ASSERT_SUCCESS(status);
804         TEST_ASSERT(numStatuses == 2);
805         TEST_ASSERT(statusVals[0] == 400);
806         TEST_ASSERT(statusVals[1] == 500);
807
808         // ?
809         pos = bi->next();
810         TEST_ASSERT(pos==6);
811         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
812         TEST_ASSERT_SUCCESS(status);
813         TEST_ASSERT(numStatuses == 1);
814         TEST_ASSERT(statusVals[0] == 0);
815
816         //
817         //  Check buffer overflow error handling.   Char == A
818         //
819         bi->first();
820         pos = bi->next();
821         TEST_ASSERT(pos==1);
822         memset(statusVals, -1, sizeof(statusVals));
823         numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
824         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
825         TEST_ASSERT(numStatuses == 2);
826         TEST_ASSERT(statusVals[0] == -1);
827
828         status = U_ZERO_ERROR;
829         memset(statusVals, -1, sizeof(statusVals));
830         numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
831         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
832         TEST_ASSERT(numStatuses == 2);
833         TEST_ASSERT(statusVals[0] == 100);
834         TEST_ASSERT(statusVals[1] == -1);
835
836         status = U_ZERO_ERROR;
837         memset(statusVals, -1, sizeof(statusVals));
838         numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
839         TEST_ASSERT_SUCCESS(status);
840         TEST_ASSERT(numStatuses == 2);
841         TEST_ASSERT(statusVals[0] == 100);
842         TEST_ASSERT(statusVals[1] == 300);
843         TEST_ASSERT(statusVals[2] == -1);
844     }
845     delete bi;
846
847}
848
849//
850//   Bug 2190 Regression test.   Builder crash on rule consisting of only a
851//                               $variable reference
852void RBBIAPITest::TestBug2190() {
853     UnicodeString rulesString1 = "$aaa = abcd;\n"
854                                  "$bbb = $aaa;\n"
855                                  "$bbb;\n";
856     UnicodeString testString1  = "abcdabcd";
857                                // 01234567890
858     int32_t bounds1[] = {0, 4, 8};
859     UErrorCode status=U_ZERO_ERROR;
860     UParseError    parseError;
861
862     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
863     if(U_FAILURE(status)) {
864         dataerrln("Fail : in construction - %s", u_errorName(status));
865     } else {
866         bi->setText(testString1);
867         doBoundaryTest(*bi, testString1, bounds1);
868     }
869     delete bi;
870}
871
872
873void RBBIAPITest::TestRegistration() {
874#if !UCONFIG_NO_SERVICE
875    UErrorCode status = U_ZERO_ERROR;
876    BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
877    // ok to not delete these if we exit because of error?
878    BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
879    BreakIterator* root_word = BreakIterator::createWordInstance("", status);
880    BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
881
882    if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
883        dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
884
885        delete ja_word;
886        delete ja_char;
887        delete root_word;
888        delete root_char;
889
890        return;
891    }
892
893    URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
894    {
895#if 0 // With a dictionary based word breaking, ja_word is identical to root.
896        if (ja_word && *ja_word == *root_word) {
897            errln("japan not different from root");
898        }
899#endif
900    }
901
902    {
903        BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
904        UBool fail = TRUE;
905        if(result){
906            fail = *result != *ja_word;
907        }
908        delete result;
909        if (fail) {
910            errln("bad result for xx_XX/word");
911        }
912    }
913
914    {
915        BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
916        UBool fail = TRUE;
917        if(result){
918            fail = *result != *ja_char;
919        }
920        delete result;
921        if (fail) {
922            errln("bad result for ja_JP/char");
923        }
924    }
925
926    {
927        BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
928        UBool fail = TRUE;
929        if(result){
930            fail = *result != *root_char;
931        }
932        delete result;
933        if (fail) {
934            errln("bad result for xx_XX/char");
935        }
936    }
937
938    {
939        StringEnumeration* avail = BreakIterator::getAvailableLocales();
940        UBool found = FALSE;
941        const UnicodeString* p;
942        while ((p = avail->snext(status))) {
943            if (p->compare("xx") == 0) {
944                found = TRUE;
945                break;
946            }
947        }
948        delete avail;
949        if (!found) {
950            errln("did not find test locale");
951        }
952    }
953
954    {
955        UBool unreg = BreakIterator::unregister(key, status);
956        if (!unreg) {
957            errln("unable to unregister");
958        }
959    }
960
961    {
962        BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
963        BreakIterator* root = BreakIterator::createWordInstance("", status);
964        UBool fail = TRUE;
965        if(root){
966          fail = *root != *result;
967        }
968        delete root;
969        delete result;
970        if (fail) {
971            errln("did not get root break");
972        }
973    }
974
975    {
976        StringEnumeration* avail = BreakIterator::getAvailableLocales();
977        UBool found = FALSE;
978        const UnicodeString* p;
979        while ((p = avail->snext(status))) {
980            if (p->compare("xx") == 0) {
981                found = TRUE;
982                break;
983            }
984        }
985        delete avail;
986        if (found) {
987            errln("found test locale");
988        }
989    }
990
991    {
992        int32_t count;
993        UBool   foundLocale = FALSE;
994        const Locale *avail = BreakIterator::getAvailableLocales(count);
995        for (int i=0; i<count; i++) {
996            if (avail[i] == Locale::getEnglish()) {
997                foundLocale = TRUE;
998                break;
999            }
1000        }
1001        if (foundLocale == FALSE) {
1002            errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1003        }
1004    }
1005
1006
1007    // ja_word was adopted by factory
1008    delete ja_char;
1009    delete root_word;
1010    delete root_char;
1011#endif
1012}
1013
1014void RBBIAPITest::RoundtripRule(const char *dataFile) {
1015    UErrorCode status = U_ZERO_ERROR;
1016    UParseError parseError;
1017    parseError.line = 0;
1018    parseError.offset = 0;
1019    LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1020    uint32_t length;
1021    const UChar *builtSource;
1022    const uint8_t *rbbiRules;
1023    const uint8_t *builtRules;
1024
1025    if (U_FAILURE(status)) {
1026        errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status));
1027        return;
1028    }
1029
1030    builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1031    builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1032    RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1033    if (U_FAILURE(status)) {
1034        errln("createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1035                u_errorName(status), parseError.line, parseError.offset);
1036        return;
1037    };
1038    rbbiRules = brkItr->getBinaryRules(length);
1039    logln("Comparing \"%s\" len=%d", dataFile, length);
1040    if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1041        errln("Built rules and rebuilt rules are different %s", dataFile);
1042        return;
1043    }
1044    delete brkItr;
1045}
1046
1047void RBBIAPITest::TestRoundtripRules() {
1048    RoundtripRule("word");
1049    RoundtripRule("title");
1050    RoundtripRule("sent");
1051    RoundtripRule("line");
1052    RoundtripRule("char");
1053    if (!quick) {
1054        RoundtripRule("word_POSIX");
1055    }
1056}
1057
1058// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1059// (these are protected so we access them via a local class RBBIWithProtectedFunctions).
1060// This is just a sanity check, not a thorough test (e.g. we don't check that the
1061// first delete actually frees rulesCopy).
1062void RBBIAPITest::TestCreateFromRBBIData() {
1063    // Get some handy RBBIData
1064    const char *brkName = "word"; // or "sent", "line", "char", etc.
1065    UErrorCode status = U_ZERO_ERROR;
1066    LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status));
1067    if ( U_SUCCESS(status) ) {
1068        const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias());
1069        uint32_t length = builtRules->fLength;
1070        RBBIWithProtectedFunctions * brkItr;
1071
1072        // Try the memory-adopting constructor, need to copy the data first
1073        RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
1074        if ( rulesCopy ) {
1075            uprv_memcpy( rulesCopy, builtRules, length );
1076
1077            brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
1078            if ( U_SUCCESS(status) ) {
1079                delete brkItr; // this should free rulesCopy
1080            } else {
1081                errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
1082                status = U_ZERO_ERROR;// reset for the next test
1083                uprv_free( rulesCopy );
1084            }
1085        }
1086
1087        // Now try the non-adopting constructor
1088        brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
1089        if ( U_SUCCESS(status) ) {
1090            delete brkItr; // this should NOT attempt to free builtRules
1091            if (builtRules->fLength != length) { // sanity check
1092                errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
1093            }
1094        } else {
1095            errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
1096        }
1097    }
1098
1099    // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
1100    //
1101    status = U_ZERO_ERROR;
1102    RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
1103    if (rb == NULL || U_FAILURE(status)) {
1104        dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status));
1105    } else {
1106        uint32_t length;
1107        const uint8_t *rules = rb->getBinaryRules(length);
1108        RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status);
1109        TEST_ASSERT_SUCCESS(status);
1110        TEST_ASSERT(*rb == *rb2);
1111        UnicodeString words = "one two three ";
1112        rb2->setText(words);
1113        int wordCounter = 0;
1114        while (rb2->next() != UBRK_DONE) {
1115            wordCounter++;
1116        }
1117        TEST_ASSERT(wordCounter == 6);
1118
1119        status = U_ZERO_ERROR;
1120        RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status);
1121        TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1122
1123        delete rb;
1124        delete rb2;
1125        delete rb3;
1126    }
1127}
1128
1129
1130void RBBIAPITest::TestRefreshInputText() {
1131    /*
1132     *  RefreshInput changes out the input of a Break Iterator without
1133     *    changing anything else in the iterator's state.  Used with Java JNI,
1134     *    when Java moves the underlying string storage.   This test
1135     *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1136     *    The right set of boundaries should still be found.
1137     */
1138    UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
1139    UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
1140    UErrorCode status = U_ZERO_ERROR;
1141    UText ut1 = UTEXT_INITIALIZER;
1142    UText ut2 = UTEXT_INITIALIZER;
1143    RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1144    TEST_ASSERT_SUCCESS(status);
1145
1146    utext_openUChars(&ut1, testStr, -1, &status);
1147    TEST_ASSERT_SUCCESS(status);
1148
1149    if (U_SUCCESS(status)) {
1150        bi->setText(&ut1, status);
1151        TEST_ASSERT_SUCCESS(status);
1152
1153        /* Line boundaries will occur before each letter in the original string */
1154        TEST_ASSERT(1 == bi->next());
1155        TEST_ASSERT(3 == bi->next());
1156
1157        /* Move the string, kill the original string.  */
1158        u_strcpy(movedStr, testStr);
1159        u_memset(testStr, 0x20, u_strlen(testStr));
1160        utext_openUChars(&ut2, movedStr, -1, &status);
1161        TEST_ASSERT_SUCCESS(status);
1162        RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1163        TEST_ASSERT_SUCCESS(status);
1164        TEST_ASSERT(bi == returnedBI);
1165
1166        /* Find the following matches, now working in the moved string. */
1167        TEST_ASSERT(5 == bi->next());
1168        TEST_ASSERT(7 == bi->next());
1169        TEST_ASSERT(8 == bi->next());
1170        TEST_ASSERT(UBRK_DONE == bi->next());
1171
1172        utext_close(&ut1);
1173        utext_close(&ut2);
1174    }
1175    delete bi;
1176
1177}
1178
1179
1180//---------------------------------------------
1181// runIndexedTest
1182//---------------------------------------------
1183
1184void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1185{
1186    if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1187    switch (index) {
1188     //   case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1189#if !UCONFIG_NO_FILE_IO
1190        case  0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1191        case  1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1192        case  2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1193        case  3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1194        case  4: name = "TestIteration"; if (exec) TestIteration(); break;
1195#else
1196        case  0: case  1: case  2: case  3: case  4: name = "skip"; break;
1197#endif
1198        case  5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1199        case  6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
1200        case  7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1201        case  8: name = "TestBug2190"; if (exec) TestBug2190(); break;
1202#if !UCONFIG_NO_FILE_IO
1203        case  9: name = "TestRegistration"; if (exec) TestRegistration(); break;
1204        case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1205        case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1206        case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1207        case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
1208#else
1209        case  9: case 10: case 11: case 12: case 13: name = "skip"; break;
1210#endif
1211        case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break;
1212
1213        default: name = ""; break; // needed to end loop
1214    }
1215}
1216
1217//---------------------------------------------
1218//Internal subroutines
1219//---------------------------------------------
1220
1221void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1222     logln((UnicodeString)"testIsBoundary():");
1223        int32_t p = 0;
1224        UBool isB;
1225        for (int32_t i = 0; i < text.length(); i++) {
1226            isB = bi.isBoundary(i);
1227            logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1228
1229            if (i == boundaries[p]) {
1230                if (!isB)
1231                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1232                p++;
1233            }
1234            else {
1235                if (isB)
1236                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1237            }
1238        }
1239}
1240void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1241    UnicodeString selected;
1242    UnicodeString expected=CharsToUnicodeString(expectedString);
1243
1244    if(gotoffset != expectedOffset)
1245         errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1246    if(start <= gotoffset){
1247        testString.extractBetween(start, gotoffset, selected);
1248    }
1249    else{
1250        testString.extractBetween(gotoffset, start, selected);
1251    }
1252    if(selected.compare(expected) != 0)
1253         errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1254    else
1255        logln(prettify("****selected \"" + selected + "\""));
1256}
1257
1258//---------------------------------------------
1259//RBBIWithProtectedFunctions class functions
1260//---------------------------------------------
1261
1262RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
1263    : RuleBasedBreakIterator(data, status)
1264{
1265}
1266
1267RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
1268    : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
1269{
1270}
1271
1272#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1273