1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * Copyright (c) 1999-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 *   Date        Name        Description
8 *   12/14/99    Madhu        Creation.
9 *   01/12/2000  Madhu        updated for changed API
10 ********************************************************************/
11
12#include "unicode/utypes.h"
13
14#if !UCONFIG_NO_BREAK_ITERATION
15
16#include "unicode/uchar.h"
17#include "intltest.h"
18#include "unicode/rbbi.h"
19#include "unicode/schriter.h"
20#include "rbbiapts.h"
21#include "rbbidata.h"
22#include "cstring.h"
23#include "ubrkimpl.h"
24#include "unicode/locid.h"
25#include "unicode/ustring.h"
26#include "unicode/utext.h"
27#include "cmemory.h"
28#if !UCONFIG_NO_BREAK_ITERATION
29#include "unicode/filteredbrk.h"
30#include <stdio.h> // for sprintf
31#endif
32/**
33 * API Test the RuleBasedBreakIterator class
34 */
35
36
37#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
38dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
39
40#define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
41    errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
42
43void RBBIAPITest::TestCloneEquals()
44{
45
46    UErrorCode status=U_ZERO_ERROR;
47    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
48    RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
49    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
50    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
51    if(U_FAILURE(status)){
52        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
53        return;
54    }
55
56
57    UnicodeString testString="Testing word break iterators's clone() and equals()";
58    bi1->setText(testString);
59    bi2->setText(testString);
60    biequal->setText(testString);
61
62    bi3->setText("hello");
63
64    logln((UnicodeString)"Testing equals()");
65
66    logln((UnicodeString)"Testing == and !=");
67    UBool b = (*bi1 != *biequal);
68    b |= *bi1 == *bi2;
69    b |= *bi1 == *bi3;
70    if (b) {
71        errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__, __LINE__);
72    }
73
74    if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
75        errln("%s:%d ERROR:2 RBBI's == and != operator  failed.", __FILE__, __LINE__);
76
77
78    // Quick test of RulesBasedBreakIterator assignment -
79    // Check that
80    //    two different iterators are !=
81    //    they are == after assignment
82    //    source and dest iterator produce the same next() after assignment.
83    //    deleting one doesn't disable the other.
84    logln("Testing assignment");
85    RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
86    if(U_FAILURE(status)){
87        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
88        return;
89    }
90
91    RuleBasedBreakIterator biDefault, biDefault2;
92    if(U_FAILURE(status)){
93        errln("%s:%d FAIL : in construction of default iterator", __FILE__, __LINE__);
94        return;
95    }
96    if (biDefault == *bix) {
97        errln("%s:%d ERROR: iterators should not compare ==", __FILE__, __LINE__);
98        return;
99    }
100    if (biDefault != biDefault2) {
101        errln("%s:%d ERROR: iterators should compare ==", __FILE__, __LINE__);
102        return;
103    }
104
105
106    UnicodeString   HelloString("Hello Kitty");
107    bix->setText(HelloString);
108    if (*bix == *bi2) {
109        errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__, __LINE__);
110    }
111    *bix = *bi2;
112    if (*bix != *bi2) {
113        errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__, __LINE__);
114    }
115
116    int bixnext = bix->next();
117    int bi2next = bi2->next();
118    if (! (bixnext == bi2next && bixnext == 7)) {
119        errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__, __LINE__);
120    }
121    delete bix;
122    if (bi2->next() != 8) {
123        errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__, __LINE__);
124    }
125
126
127
128    logln((UnicodeString)"Testing clone()");
129    RuleBasedBreakIterator* bi1clone = dynamic_cast<RuleBasedBreakIterator *>(bi1->clone());
130    RuleBasedBreakIterator* bi2clone = dynamic_cast<RuleBasedBreakIterator *>(bi2->clone());
131
132    if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
133      *bi1clone == *bi3 || *bi1clone == *bi2)
134        errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__, __LINE__);
135
136    if(*bi2clone == *bi1 || *bi2clone == *biequal ||
137       *bi2clone == *bi3 || *bi2clone != *bi2)
138        errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__, __LINE__);
139
140    if(bi1->getText() != bi1clone->getText()   ||
141       bi2clone->getText() != bi2->getText()   ||
142       *bi2clone == *bi1clone )
143        errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__, __LINE__);
144
145    delete bi1clone;
146    delete bi2clone;
147    delete bi1;
148    delete bi3;
149    delete bi2;
150    delete biequal;
151}
152
153void RBBIAPITest::TestBoilerPlate()
154{
155    UErrorCode status = U_ZERO_ERROR;
156    BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
157    BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
158    if (U_FAILURE(status)) {
159        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
160        return;
161    }
162    if(*a!=*b){
163        errln("Failed: boilerplate method operator!= does not return correct results");
164    }
165    // Japanese word break iterators are identical to root with
166    // a dictionary-based break iterator
167    BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
168    BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
169    if(c && d){
170        if(*c!=*d){
171            errln("Failed: boilerplate method operator== does not return correct results");
172        }
173    }else{
174        errln("creation of break iterator failed");
175    }
176    delete a;
177    delete b;
178    delete c;
179    delete d;
180}
181
182void RBBIAPITest::TestgetRules()
183{
184    UErrorCode status=U_ZERO_ERROR;
185
186    LocalPointer<RuleBasedBreakIterator> bi1(
187            (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status), status);
188    LocalPointer<RuleBasedBreakIterator> bi2(
189            (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status), status);
190    if(U_FAILURE(status)){
191        errcheckln(status, "%s:%d, FAIL: in construction - %s", __FILE__, __LINE__, u_errorName(status));
192        return;
193    }
194
195    logln((UnicodeString)"Testing getRules()");
196
197    UnicodeString text(u"Hello there");
198    bi1->setText(text);
199
200    LocalPointer <RuleBasedBreakIterator> bi3((RuleBasedBreakIterator*)bi1->clone());
201
202    UnicodeString temp=bi1->getRules();
203    UnicodeString temp2=bi2->getRules();
204    UnicodeString temp3=bi3->getRules();
205    if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
206        errln("%s:%d ERROR: error in getRules() method", __FILE__, __LINE__);
207
208    RuleBasedBreakIterator bi4;   // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
209    if (!bi4.getRules().isEmpty()) {
210        errln("%s:%d Empty string expected.", __FILE__, __LINE__);
211    }
212}
213
214void RBBIAPITest::TestHashCode()
215{
216    UErrorCode status=U_ZERO_ERROR;
217    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
218    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
219    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
220    if(U_FAILURE(status)){
221        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
222        delete bi1;
223        delete bi2;
224        delete bi3;
225        return;
226    }
227
228
229    logln((UnicodeString)"Testing hashCode()");
230
231    bi1->setText((UnicodeString)"Hash code");
232    bi2->setText((UnicodeString)"Hash code");
233    bi3->setText((UnicodeString)"Hash code");
234
235    RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
236    RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
237
238    if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
239        bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
240        errln((UnicodeString)"ERROR: identical objects have different hashcodes");
241
242    if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
243        bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
244        errln((UnicodeString)"ERROR: different objects have same hashcodes");
245
246    delete bi1clone;
247    delete bi2clone;
248    delete bi1;
249    delete bi2;
250    delete bi3;
251
252}
253void RBBIAPITest::TestGetSetAdoptText()
254{
255    logln((UnicodeString)"Testing getText setText ");
256    IcuTestErrorCode status(*this, "TestGetSetAdoptText");
257    UnicodeString str1="first string.";
258    UnicodeString str2="Second string.";
259    LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
260    LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
261    if(status.isFailure()){
262        errcheckln(status, "Fail : in construction - %s", status.errorName());
263            return;
264    }
265
266
267    CharacterIterator* text1= new StringCharacterIterator(str1);
268    CharacterIterator* text1Clone = text1->clone();
269    CharacterIterator* text2= new StringCharacterIterator(str2);
270    CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
271
272    wordIter1->setText(str1);
273    CharacterIterator *tci = &wordIter1->getText();
274    UnicodeString      tstr;
275    tci->getText(tstr);
276    TEST_ASSERT(tstr == str1);
277    if(wordIter1->current() != 0)
278        errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
279
280    wordIter1->next(2);
281
282    wordIter1->setText(str2);
283    if(wordIter1->current() != 0)
284        errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
285
286
287    charIter1->adoptText(text1Clone);
288    TEST_ASSERT(wordIter1->getText() != charIter1->getText());
289    tci = &wordIter1->getText();
290    tci->getText(tstr);
291    TEST_ASSERT(tstr == str2);
292    tci = &charIter1->getText();
293    tci->getText(tstr);
294    TEST_ASSERT(tstr == str1);
295
296
297    LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
298    rb->adoptText(text1);
299    if(rb->getText() != *text1)
300        errln((UnicodeString)"ERROR:1 error in adoptText ");
301    rb->adoptText(text2);
302    if(rb->getText() != *text2)
303        errln((UnicodeString)"ERROR:2 error in adoptText ");
304
305    // Adopt where iterator range is less than the entire orignal source string.
306    //   (With the change of the break engine to working with UText internally,
307    //    CharacterIterators starting at positions other than zero are not supported)
308    rb->adoptText(text3);
309    TEST_ASSERT(rb->preceding(2) == 0);
310    TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
311    //if(rb->preceding(2) != 3) {
312    //    errln((UnicodeString)"ERROR:3 error in adoptText ");
313    //}
314    //if(rb->following(11) != BreakIterator::DONE) {
315    //    errln((UnicodeString)"ERROR:4 error in adoptText ");
316    //}
317
318    // UText API
319    //
320    //   Quick test to see if UText is working at all.
321    //
322    const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
323    const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
324    //                012345678901
325
326    status.reset();
327    LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
328    wordIter1->setText(ut.getAlias(), status);
329    TEST_ASSERT_SUCCESS(status);
330
331    int32_t pos;
332    pos = wordIter1->first();
333    TEST_ASSERT(pos==0);
334    pos = wordIter1->next();
335    TEST_ASSERT(pos==5);
336    pos = wordIter1->next();
337    TEST_ASSERT(pos==6);
338    pos = wordIter1->next();
339    TEST_ASSERT(pos==11);
340    pos = wordIter1->next();
341    TEST_ASSERT(pos==UBRK_DONE);
342
343    status.reset();
344    LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
345    TEST_ASSERT_SUCCESS(status);
346    wordIter1->setText(ut2.getAlias(), status);
347    TEST_ASSERT_SUCCESS(status);
348
349    pos = wordIter1->first();
350    TEST_ASSERT(pos==0);
351    pos = wordIter1->next();
352    TEST_ASSERT(pos==3);
353    pos = wordIter1->next();
354    TEST_ASSERT(pos==4);
355
356    pos = wordIter1->last();
357    TEST_ASSERT(pos==6);
358    pos = wordIter1->previous();
359    TEST_ASSERT(pos==4);
360    pos = wordIter1->previous();
361    TEST_ASSERT(pos==3);
362    pos = wordIter1->previous();
363    TEST_ASSERT(pos==0);
364    pos = wordIter1->previous();
365    TEST_ASSERT(pos==UBRK_DONE);
366
367    status.reset();
368    UnicodeString sEmpty;
369    LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
370    wordIter1->getUText(gut2.getAlias(), status);
371    TEST_ASSERT_SUCCESS(status);
372    status.reset();
373}
374
375
376void RBBIAPITest::TestIteration()
377{
378    // This test just verifies that the API is present.
379    // Testing for correct operation of the break rules happens elsewhere.
380
381    UErrorCode status=U_ZERO_ERROR;
382    RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
383    if (U_FAILURE(status) || bi == NULL)  {
384        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
385    }
386    delete bi;
387
388    status=U_ZERO_ERROR;
389    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
390    if (U_FAILURE(status) || bi == NULL)  {
391        errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
392    }
393    delete bi;
394
395    status=U_ZERO_ERROR;
396    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
397    if (U_FAILURE(status) || bi == NULL)  {
398        errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
399    }
400    delete bi;
401
402    status=U_ZERO_ERROR;
403    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
404    if (U_FAILURE(status) || bi == NULL)  {
405        errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
406    }
407    delete bi;
408
409    status=U_ZERO_ERROR;
410    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
411    if (U_FAILURE(status) || bi == NULL)  {
412        errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
413    }
414    delete bi;
415
416    status=U_ZERO_ERROR;
417    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
418    if (U_FAILURE(status) || bi == NULL)  {
419        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
420        return;   // Skip the rest of these tests.
421    }
422
423
424    UnicodeString testString="0123456789";
425    bi->setText(testString);
426
427    int32_t i;
428    i = bi->first();
429    if (i != 0) {
430        errln("%s:%d Incorrect value from bi->first().  Expected 0, got %d.", __FILE__, __LINE__, i);
431    }
432
433    i = bi->last();
434    if (i != 10) {
435        errln("%s:%d Incorrect value from bi->last().  Expected 10, got %d", __FILE__, __LINE__, i);
436    }
437
438    //
439    // Previous
440    //
441    bi->last();
442    i = bi->previous();
443    if (i != 9) {
444        errln("%s:%d Incorrect value from bi->last().  Expected 9, got %d", __FILE__, __LINE__, i);
445    }
446
447
448    bi->first();
449    i = bi->previous();
450    if (i != BreakIterator::DONE) {
451        errln("%s:%d Incorrect value from bi->previous().  Expected DONE, got %d", __FILE__, __LINE__, i);
452    }
453
454    //
455    // next()
456    //
457    bi->first();
458    i = bi->next();
459    if (i != 1) {
460        errln("%s:%d Incorrect value from bi->next().  Expected 1, got %d", __FILE__, __LINE__, i);
461    }
462
463    bi->last();
464    i = bi->next();
465    if (i != BreakIterator::DONE) {
466        errln("%s:%d Incorrect value from bi->next().  Expected DONE, got %d", __FILE__, __LINE__, i);
467    }
468
469
470    //
471    //  current()
472    //
473    bi->first();
474    i = bi->current();
475    if (i != 0) {
476        errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__, __LINE__, i);
477    }
478
479    bi->next();
480    i = bi->current();
481    if (i != 1) {
482        errln("%s:%d Incorrect value from bi->current().  Expected 1, got %d", __FILE__, __LINE__, i);
483    }
484
485    bi->last();
486    bi->next();
487    i = bi->current();
488    if (i != 10) {
489        errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__, __LINE__, i);
490    }
491
492    bi->first();
493    bi->previous();
494    i = bi->current();
495    if (i != 0) {
496        errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__, __LINE__, i);
497    }
498
499
500    //
501    // Following()
502    //
503    i = bi->following(4);
504    if (i != 5) {
505        errln("%s:%d Incorrect value from bi->following().  Expected 5, got %d", __FILE__, __LINE__, i);
506    }
507
508    i = bi->following(9);
509    if (i != 10) {
510        errln("%s:%d Incorrect value from bi->following().  Expected 10, got %d", __FILE__, __LINE__, i);
511    }
512
513    i = bi->following(10);
514    if (i != BreakIterator::DONE) {
515        errln("%s:%d Incorrect value from bi->following().  Expected DONE, got %d", __FILE__, __LINE__, i);
516    }
517
518
519    //
520    // Preceding
521    //
522    i = bi->preceding(4);
523    if (i != 3) {
524        errln("%s:%d Incorrect value from bi->preceding().  Expected 3, got %d", __FILE__, __LINE__, i);
525    }
526
527    i = bi->preceding(10);
528    if (i != 9) {
529        errln("%s:%d Incorrect value from bi->preceding().  Expected 9, got %d", __FILE__, __LINE__, i);
530    }
531
532    i = bi->preceding(1);
533    if (i != 0) {
534        errln("%s:%d Incorrect value from bi->preceding().  Expected 0, got %d", __FILE__, __LINE__, i);
535    }
536
537    i = bi->preceding(0);
538    if (i != BreakIterator::DONE) {
539        errln("%s:%d Incorrect value from bi->preceding().  Expected DONE, got %d", __FILE__, __LINE__, i);
540    }
541
542
543    //
544    // isBoundary()
545    //
546    bi->first();
547    if (bi->isBoundary(3) != TRUE) {
548        errln("%s:%d Incorrect value from bi->isBoudary().  Expected TRUE, got FALSE", __FILE__, __LINE__, i);
549    }
550    i = bi->current();
551    if (i != 3) {
552        errln("%s:%d Incorrect value from bi->current().  Expected 3, got %d", __FILE__, __LINE__, i);
553    }
554
555
556    if (bi->isBoundary(11) != FALSE) {
557        errln("%s:%d Incorrect value from bi->isBoudary().  Expected FALSE, got TRUE", __FILE__, __LINE__, i);
558    }
559    i = bi->current();
560    if (i != 10) {
561        errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__, __LINE__, i);
562    }
563
564    //
565    // next(n)
566    //
567    bi->first();
568    i = bi->next(4);
569    if (i != 4) {
570        errln("%s:%d Incorrect value from bi->next().  Expected 4, got %d", __FILE__, __LINE__, i);
571    }
572
573    i = bi->next(6);
574    if (i != 10) {
575        errln("%s:%d Incorrect value from bi->next().  Expected 10, got %d", __FILE__, __LINE__, i);
576    }
577
578    bi->first();
579    i = bi->next(11);
580    if (i != BreakIterator::DONE) {
581        errln("%s:%d Incorrect value from bi->next().  Expected BreakIterator::DONE, got %d", __FILE__, __LINE__, i);
582    }
583
584    delete bi;
585
586}
587
588
589
590
591
592
593void RBBIAPITest::TestBuilder() {
594     UnicodeString rulesString1 = "$Letters = [:L:];\n"
595                                  "$Numbers = [:N:];\n"
596                                  "$Letters+;\n"
597                                  "$Numbers+;\n"
598                                  "[^$Letters $Numbers];\n"
599                                  "!.*;\n";
600     UnicodeString testString1  = "abc123..abc";
601                                // 01234567890
602     int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
603     UErrorCode status=U_ZERO_ERROR;
604     UParseError    parseError;
605
606     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
607     if(U_FAILURE(status)) {
608         dataerrln("Fail : in construction - %s", u_errorName(status));
609     } else {
610         bi->setText(testString1);
611         doBoundaryTest(*bi, testString1, bounds1);
612     }
613     delete bi;
614}
615
616
617//
618//  TestQuoteGrouping
619//       Single quotes within rules imply a grouping, so that a modifier
620//       following the quoted text (* or +) applies to all of the quoted chars.
621//
622void RBBIAPITest::TestQuoteGrouping() {
623     UnicodeString rulesString1 = "#Here comes the rule...\n"
624                                  "'$@!'*;\n"   //  (\$\@\!)*
625                                  ".;\n";
626
627     UnicodeString testString1  = "$@!$@!X$@!!X";
628                                // 0123456789012
629     int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
630     UErrorCode status=U_ZERO_ERROR;
631     UParseError    parseError;
632
633     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
634     if(U_FAILURE(status)) {
635         dataerrln("Fail : in construction - %s", u_errorName(status));
636     } else {
637         bi->setText(testString1);
638         doBoundaryTest(*bi, testString1, bounds1);
639     }
640     delete bi;
641}
642
643//
644//  TestRuleStatus
645//      Test word break rule status constants.
646//
647void RBBIAPITest::TestRuleStatus() {
648     UChar str[30];
649     //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
650     // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
651     u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
652              // 012345678901234567  8      9    0
653              //                     Katakana
654                str, 30);
655     UnicodeString testString1(str);
656     int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
657     int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
658                          UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
659                          UBRK_WORD_IDEO,     UBRK_WORD_NONE};
660
661     int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
662                          UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
663                          UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
664
665     UErrorCode status=U_ZERO_ERROR;
666
667     BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
668     if(U_FAILURE(status)) {
669         errcheckln(status, "%s:%d Fail in construction - %s", __FILE__, __LINE__, u_errorName(status));
670     } else {
671         bi->setText(testString1);
672         // First test that the breaks are in the right spots.
673         doBoundaryTest(*bi, testString1, bounds1);
674
675         // Then go back and check tag values
676         int32_t i = 0;
677         int32_t pos, tag;
678         for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
679             if (pos != bounds1[i]) {
680                 errln("%s:%d FAIL: unexpected word break at postion %d", __FILE__, __LINE__, pos);
681                 break;
682             }
683             tag = bi->getRuleStatus();
684             if (tag < tag_lo[i] || tag >= tag_hi[i]) {
685                 errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__, __LINE__, tag, pos);
686                 break;
687             }
688
689             // Check that we get the same tag values from getRuleStatusVec()
690             int32_t vec[10];
691             int t = bi->getRuleStatusVec(vec, 10, status);
692             TEST_ASSERT_SUCCESS(status);
693             TEST_ASSERT(t==1);
694             TEST_ASSERT(vec[0] == tag);
695         }
696     }
697     delete bi;
698
699     // Now test line break status.  This test mostly is to confirm that the status constants
700     //                              are correctly declared in the header.
701     testString1 =   "test line. \n";
702     // break type    s    s     h
703
704     bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
705     if(U_FAILURE(status)) {
706         errcheckln(status, "%s:%d failed to create line break iterator. - %s", __FILE__, __LINE__, u_errorName(status));
707     } else {
708         int32_t i = 0;
709         int32_t pos, tag;
710         UBool   success;
711
712         bi->setText(testString1);
713         pos = bi->current();
714         tag = bi->getRuleStatus();
715         for (i=0; i<3; i++) {
716             switch (i) {
717             case 0:
718                 success = pos==0  && tag==UBRK_LINE_SOFT; break;
719             case 1:
720                 success = pos==5  && tag==UBRK_LINE_SOFT; break;
721             case 2:
722                 success = pos==12 && tag==UBRK_LINE_HARD; break;
723             default:
724                 success = FALSE; break;
725             }
726             if (success == FALSE) {
727                 errln("%s:%d: incorrect line break status or position.  i=%d, pos=%d, tag=%d",
728                     __FILE__, __LINE__, i, pos, tag);
729                 break;
730             }
731             pos = bi->next();
732             tag = bi->getRuleStatus();
733         }
734         if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
735             UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
736             (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
737             errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__, __LINE__);
738         }
739     }
740     delete bi;
741
742}
743
744
745//
746//  TestRuleStatusVec
747//      Test the vector form of  break rule status.
748//
749void RBBIAPITest::TestRuleStatusVec() {
750    UnicodeString rulesString(   "[A-N]{100}; \n"
751                                 "[a-w]{200}; \n"
752                                 "[\\p{L}]{300}; \n"
753                                 "[\\p{N}]{400}; \n"
754                                 "[0-5]{500}; \n"
755                                  "!.*;\n", -1, US_INV);
756     UnicodeString testString1  = "Aapz5?";
757     int32_t  statusVals[10];
758     int32_t  numStatuses;
759     int32_t  pos;
760
761     UErrorCode status=U_ZERO_ERROR;
762     UParseError    parseError;
763
764     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
765     if (U_FAILURE(status)) {
766         dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
767     } else {
768         bi->setText(testString1);
769
770         // A
771         pos = bi->next();
772         TEST_ASSERT(pos==1);
773         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
774         TEST_ASSERT_SUCCESS(status);
775         TEST_ASSERT(numStatuses == 2);
776         TEST_ASSERT(statusVals[0] == 100);
777         TEST_ASSERT(statusVals[1] == 300);
778
779         // a
780         pos = bi->next();
781         TEST_ASSERT(pos==2);
782         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
783         TEST_ASSERT_SUCCESS(status);
784         TEST_ASSERT(numStatuses == 2);
785         TEST_ASSERT(statusVals[0] == 200);
786         TEST_ASSERT(statusVals[1] == 300);
787
788         // p
789         pos = bi->next();
790         TEST_ASSERT(pos==3);
791         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
792         TEST_ASSERT_SUCCESS(status);
793         TEST_ASSERT(numStatuses == 2);
794         TEST_ASSERT(statusVals[0] == 200);
795         TEST_ASSERT(statusVals[1] == 300);
796
797         // z
798         pos = bi->next();
799         TEST_ASSERT(pos==4);
800         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
801         TEST_ASSERT_SUCCESS(status);
802         TEST_ASSERT(numStatuses == 1);
803         TEST_ASSERT(statusVals[0] == 300);
804
805         // 5
806         pos = bi->next();
807         TEST_ASSERT(pos==5);
808         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
809         TEST_ASSERT_SUCCESS(status);
810         TEST_ASSERT(numStatuses == 2);
811         TEST_ASSERT(statusVals[0] == 400);
812         TEST_ASSERT(statusVals[1] == 500);
813
814         // ?
815         pos = bi->next();
816         TEST_ASSERT(pos==6);
817         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
818         TEST_ASSERT_SUCCESS(status);
819         TEST_ASSERT(numStatuses == 1);
820         TEST_ASSERT(statusVals[0] == 0);
821
822         //
823         //  Check buffer overflow error handling.   Char == A
824         //
825         bi->first();
826         pos = bi->next();
827         TEST_ASSERT(pos==1);
828         memset(statusVals, -1, sizeof(statusVals));
829         numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
830         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
831         TEST_ASSERT(numStatuses == 2);
832         TEST_ASSERT(statusVals[0] == -1);
833
834         status = U_ZERO_ERROR;
835         memset(statusVals, -1, sizeof(statusVals));
836         numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
837         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
838         TEST_ASSERT(numStatuses == 2);
839         TEST_ASSERT(statusVals[0] == 100);
840         TEST_ASSERT(statusVals[1] == -1);
841
842         status = U_ZERO_ERROR;
843         memset(statusVals, -1, sizeof(statusVals));
844         numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
845         TEST_ASSERT_SUCCESS(status);
846         TEST_ASSERT(numStatuses == 2);
847         TEST_ASSERT(statusVals[0] == 100);
848         TEST_ASSERT(statusVals[1] == 300);
849         TEST_ASSERT(statusVals[2] == -1);
850     }
851     delete bi;
852
853}
854
855//
856//   Bug 2190 Regression test.   Builder crash on rule consisting of only a
857//                               $variable reference
858void RBBIAPITest::TestBug2190() {
859     UnicodeString rulesString1 = "$aaa = abcd;\n"
860                                  "$bbb = $aaa;\n"
861                                  "$bbb;\n";
862     UnicodeString testString1  = "abcdabcd";
863                                // 01234567890
864     int32_t bounds1[] = {0, 4, 8};
865     UErrorCode status=U_ZERO_ERROR;
866     UParseError    parseError;
867
868     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
869     if(U_FAILURE(status)) {
870         dataerrln("Fail : in construction - %s", u_errorName(status));
871     } else {
872         bi->setText(testString1);
873         doBoundaryTest(*bi, testString1, bounds1);
874     }
875     delete bi;
876}
877
878
879void RBBIAPITest::TestRegistration() {
880#if !UCONFIG_NO_SERVICE
881    UErrorCode status = U_ZERO_ERROR;
882    BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
883    // ok to not delete these if we exit because of error?
884    BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
885    BreakIterator* root_word = BreakIterator::createWordInstance("", status);
886    BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
887
888    if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
889        dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
890
891        delete ja_word;
892        delete ja_char;
893        delete root_word;
894        delete root_char;
895
896        return;
897    }
898
899    URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
900    {
901#if 0 // With a dictionary based word breaking, ja_word is identical to root.
902        if (ja_word && *ja_word == *root_word) {
903            errln("japan not different from root");
904        }
905#endif
906    }
907
908    {
909        BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
910        UBool fail = TRUE;
911        if(result){
912            fail = *result != *ja_word;
913        }
914        delete result;
915        if (fail) {
916            errln("bad result for xx_XX/word");
917        }
918    }
919
920    {
921        BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
922        UBool fail = TRUE;
923        if(result){
924            fail = *result != *ja_char;
925        }
926        delete result;
927        if (fail) {
928            errln("bad result for ja_JP/char");
929        }
930    }
931
932    {
933        BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
934        UBool fail = TRUE;
935        if(result){
936            fail = *result != *root_char;
937        }
938        delete result;
939        if (fail) {
940            errln("bad result for xx_XX/char");
941        }
942    }
943
944    {
945        StringEnumeration* avail = BreakIterator::getAvailableLocales();
946        UBool found = FALSE;
947        const UnicodeString* p;
948        while ((p = avail->snext(status))) {
949            if (p->compare("xx") == 0) {
950                found = TRUE;
951                break;
952            }
953        }
954        delete avail;
955        if (!found) {
956            errln("did not find test locale");
957        }
958    }
959
960    {
961        UBool unreg = BreakIterator::unregister(key, status);
962        if (!unreg) {
963            errln("unable to unregister");
964        }
965    }
966
967    {
968        BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
969        BreakIterator* root = BreakIterator::createWordInstance("", status);
970        UBool fail = TRUE;
971        if(root){
972          fail = *root != *result;
973        }
974        delete root;
975        delete result;
976        if (fail) {
977            errln("did not get root break");
978        }
979    }
980
981    {
982        StringEnumeration* avail = BreakIterator::getAvailableLocales();
983        UBool found = FALSE;
984        const UnicodeString* p;
985        while ((p = avail->snext(status))) {
986            if (p->compare("xx") == 0) {
987                found = TRUE;
988                break;
989            }
990        }
991        delete avail;
992        if (found) {
993            errln("found test locale");
994        }
995    }
996
997    {
998        int32_t count;
999        UBool   foundLocale = FALSE;
1000        const Locale *avail = BreakIterator::getAvailableLocales(count);
1001        for (int i=0; i<count; i++) {
1002            if (avail[i] == Locale::getEnglish()) {
1003                foundLocale = TRUE;
1004                break;
1005            }
1006        }
1007        if (foundLocale == FALSE) {
1008            errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1009        }
1010    }
1011
1012
1013    // ja_word was adopted by factory
1014    delete ja_char;
1015    delete root_word;
1016    delete root_char;
1017#endif
1018}
1019
1020void RBBIAPITest::RoundtripRule(const char *dataFile) {
1021    UErrorCode status = U_ZERO_ERROR;
1022    UParseError parseError;
1023    parseError.line = 0;
1024    parseError.offset = 0;
1025    LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1026    uint32_t length;
1027    const UChar *builtSource;
1028    const uint8_t *rbbiRules;
1029    const uint8_t *builtRules;
1030
1031    if (U_FAILURE(status)) {
1032        errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1033        return;
1034    }
1035
1036    builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1037    builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1038    RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1039    if (U_FAILURE(status)) {
1040        errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1041                __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1042        errln(UnicodeString(builtSource));
1043        return;
1044    };
1045    rbbiRules = brkItr->getBinaryRules(length);
1046    logln("Comparing \"%s\" len=%d", dataFile, length);
1047    if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1048        errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1049        return;
1050    }
1051    delete brkItr;
1052}
1053
1054void RBBIAPITest::TestRoundtripRules() {
1055    RoundtripRule("word");
1056    RoundtripRule("title");
1057    RoundtripRule("sent");
1058    RoundtripRule("line");
1059    RoundtripRule("char");
1060    if (!quick) {
1061        RoundtripRule("word_POSIX");
1062    }
1063}
1064
1065
1066// Check getBinaryRules() and construction of a break iterator from those rules.
1067
1068void RBBIAPITest::TestGetBinaryRules() {
1069    UErrorCode status=U_ZERO_ERROR;
1070    LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1071    if (U_FAILURE(status)) {
1072        dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
1073        return;
1074    }
1075    RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1076    if (rbbi == NULL) {
1077        dataerrln("FAIL: RuleBasedBreakIterator is NULL");
1078        return;
1079    }
1080
1081    // Check that the new line break iterator is nominally functional.
1082    UnicodeString helloWorld("Hello, World!");
1083    rbbi->setText(helloWorld);
1084    int n = 0;
1085    while (bi->next() != UBRK_DONE) {
1086        ++n;
1087    }
1088    TEST_ASSERT(n == 2);
1089
1090    // Extract the binary rules as a uint8_t blob.
1091    uint32_t ruleLength;
1092    const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1093    TEST_ASSERT(ruleLength > 0);
1094    TEST_ASSERT(binRules != NULL);
1095
1096    // Clone the binary rules, and create a break iterator from that.
1097    // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1098    uint8_t *clonedRules = new uint8_t[ruleLength];
1099    memcpy(clonedRules, binRules, ruleLength);
1100    RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1101    TEST_ASSERT_SUCCESS(status);
1102
1103    // Check that the cloned line break iterator is nominally alive.
1104    clonedBI.setText(helloWorld);
1105    n = 0;
1106    while (clonedBI.next() != UBRK_DONE) {
1107        ++n;
1108    }
1109    TEST_ASSERT(n == 2);
1110
1111    delete[] clonedRules;
1112}
1113
1114
1115void RBBIAPITest::TestRefreshInputText() {
1116    /*
1117     *  RefreshInput changes out the input of a Break Iterator without
1118     *    changing anything else in the iterator's state.  Used with Java JNI,
1119     *    when Java moves the underlying string storage.   This test
1120     *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1121     *    The right set of boundaries should still be found.
1122     */
1123    UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
1124    UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
1125    UErrorCode status = U_ZERO_ERROR;
1126    UText ut1 = UTEXT_INITIALIZER;
1127    UText ut2 = UTEXT_INITIALIZER;
1128    RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1129    TEST_ASSERT_SUCCESS(status);
1130
1131    utext_openUChars(&ut1, testStr, -1, &status);
1132    TEST_ASSERT_SUCCESS(status);
1133
1134    if (U_SUCCESS(status)) {
1135        bi->setText(&ut1, status);
1136        TEST_ASSERT_SUCCESS(status);
1137
1138        /* Line boundaries will occur before each letter in the original string */
1139        TEST_ASSERT(1 == bi->next());
1140        TEST_ASSERT(3 == bi->next());
1141
1142        /* Move the string, kill the original string.  */
1143        u_strcpy(movedStr, testStr);
1144        u_memset(testStr, 0x20, u_strlen(testStr));
1145        utext_openUChars(&ut2, movedStr, -1, &status);
1146        TEST_ASSERT_SUCCESS(status);
1147        RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1148        TEST_ASSERT_SUCCESS(status);
1149        TEST_ASSERT(bi == returnedBI);
1150
1151        /* Find the following matches, now working in the moved string. */
1152        TEST_ASSERT(5 == bi->next());
1153        TEST_ASSERT(7 == bi->next());
1154        TEST_ASSERT(8 == bi->next());
1155        TEST_ASSERT(UBRK_DONE == bi->next());
1156
1157        utext_close(&ut1);
1158        utext_close(&ut2);
1159    }
1160    delete bi;
1161
1162}
1163
1164#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1165static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1166  static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1167  it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1168
1169  int32_t *pos = new int32_t[ustr.length()];
1170  int32_t posCount = 0;
1171
1172  // calculate breaks up front, so we can print out
1173  // sans any debugging
1174  for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1175    pos[posCount++] = n;
1176    if(posCount>=ustr.length()) {
1177      it.errln("brk count exceeds string length!");
1178      return;
1179    }
1180  }
1181  UnicodeString out;
1182  out.append((UChar)CHSTR);
1183  int32_t prev = 0;
1184  for(int32_t i=0;i<posCount;i++) {
1185    int32_t n=pos[i];
1186    out.append(ustr.tempSubString(prev,n-prev));
1187    out.append((UChar)PILCROW);
1188    prev=n;
1189  }
1190  out.append(ustr.tempSubString(prev,ustr.length()-prev));
1191  out.append((UChar)CHEND);
1192  it.logln(out);
1193
1194  out.remove();
1195  for(int32_t i=0;i<posCount;i++) {
1196    char tmp[100];
1197    sprintf(tmp,"%d ",pos[i]);
1198    out.append(UnicodeString(tmp));
1199  }
1200  it.logln(out);
1201  delete [] pos;
1202}
1203#endif
1204
1205void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1206#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1207  UErrorCode status = U_ZERO_ERROR;
1208  LocalPointer<FilteredBreakIteratorBuilder> builder;
1209  LocalPointer<BreakIterator> baseBI;
1210  LocalPointer<BreakIterator> filteredBI;
1211  LocalPointer<BreakIterator> frenchBI;
1212
1213  const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1214  const UnicodeString ABBR_MR("Mr.");
1215  const UnicodeString ABBR_CAPT("Capt.");
1216
1217  {
1218    logln("Constructing empty builder\n");
1219    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1220    TEST_ASSERT_SUCCESS(status);
1221
1222    logln("Constructing base BI\n");
1223    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1224    TEST_ASSERT_SUCCESS(status);
1225
1226	logln("Building new BI\n");
1227    filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1228    TEST_ASSERT_SUCCESS(status);
1229
1230	if (U_SUCCESS(status)) {
1231        logln("Testing:");
1232        filteredBI->setText(text);
1233        TEST_ASSERT(20 == filteredBI->next()); // Mr.
1234        TEST_ASSERT(84 == filteredBI->next()); // recovered.
1235        TEST_ASSERT(90 == filteredBI->next()); // Capt.
1236        TEST_ASSERT(181 == filteredBI->next()); // Mr.
1237        TEST_ASSERT(278 == filteredBI->next()); // charge.
1238        filteredBI->first();
1239        prtbrks(filteredBI.getAlias(), text, *this);
1240    }
1241  }
1242
1243  {
1244    logln("Constructing empty builder\n");
1245    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1246    TEST_ASSERT_SUCCESS(status);
1247
1248    if (U_SUCCESS(status)) {
1249        logln("Adding Mr. as an exception\n");
1250        TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1251        TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1252        TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1253        TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1254        TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1255        TEST_ASSERT_SUCCESS(status);
1256
1257        logln("Constructing base BI\n");
1258        baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1259        TEST_ASSERT_SUCCESS(status);
1260
1261        logln("Building new BI\n");
1262        filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1263        TEST_ASSERT_SUCCESS(status);
1264
1265        logln("Testing:");
1266        filteredBI->setText(text);
1267        TEST_ASSERT(84 == filteredBI->next());
1268        TEST_ASSERT(90 == filteredBI->next());// Capt.
1269        TEST_ASSERT(278 == filteredBI->next());
1270        filteredBI->first();
1271        prtbrks(filteredBI.getAlias(), text, *this);
1272    }
1273  }
1274
1275
1276  {
1277    logln("Constructing empty builder\n");
1278    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1279    TEST_ASSERT_SUCCESS(status);
1280
1281    if (U_SUCCESS(status)) {
1282        logln("Adding Mr. and Capt as an exception\n");
1283        TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1284        TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1285        TEST_ASSERT_SUCCESS(status);
1286
1287        logln("Constructing base BI\n");
1288        baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1289        TEST_ASSERT_SUCCESS(status);
1290
1291        logln("Building new BI\n");
1292        filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1293        TEST_ASSERT_SUCCESS(status);
1294
1295        logln("Testing:");
1296        filteredBI->setText(text);
1297        TEST_ASSERT(84 == filteredBI->next());
1298        TEST_ASSERT(278 == filteredBI->next());
1299        filteredBI->first();
1300        prtbrks(filteredBI.getAlias(), text, *this);
1301    }
1302  }
1303
1304
1305  {
1306    logln("Constructing English builder\n");
1307    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1308    TEST_ASSERT_SUCCESS(status);
1309
1310    logln("Constructing base BI\n");
1311    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1312    TEST_ASSERT_SUCCESS(status);
1313
1314    if (U_SUCCESS(status)) {
1315        logln("unsuppressing 'Capt'");
1316        TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1317
1318        logln("Building new BI\n");
1319        filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1320        TEST_ASSERT_SUCCESS(status);
1321
1322        if(filteredBI.isValid()) {
1323          logln("Testing:");
1324          filteredBI->setText(text);
1325          TEST_ASSERT(84 == filteredBI->next());
1326          TEST_ASSERT(90 == filteredBI->next());
1327          TEST_ASSERT(278 == filteredBI->next());
1328          filteredBI->first();
1329          prtbrks(filteredBI.getAlias(), text, *this);
1330        }
1331    }
1332  }
1333
1334
1335  {
1336    logln("Constructing English builder\n");
1337    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1338    TEST_ASSERT_SUCCESS(status);
1339
1340    logln("Constructing base BI\n");
1341    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1342    TEST_ASSERT_SUCCESS(status);
1343
1344    if (U_SUCCESS(status)) {
1345        logln("Building new BI\n");
1346        filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1347        TEST_ASSERT_SUCCESS(status);
1348
1349        if(filteredBI.isValid()) {
1350          logln("Testing:");
1351          filteredBI->setText(text);
1352          TEST_ASSERT(84 == filteredBI->next());
1353          TEST_ASSERT(278 == filteredBI->next());
1354          filteredBI->first();
1355          prtbrks(filteredBI.getAlias(), text, *this);
1356        }
1357    }
1358  }
1359
1360  // reenable once french is in
1361  {
1362    logln("Constructing French builder");
1363    builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1364    TEST_ASSERT_SUCCESS(status);
1365
1366    logln("Constructing base BI\n");
1367    baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1368    TEST_ASSERT_SUCCESS(status);
1369
1370    if (U_SUCCESS(status)) {
1371        logln("Building new BI\n");
1372        frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1373        TEST_ASSERT_SUCCESS(status);
1374    }
1375
1376    if(frenchBI.isValid()) {
1377      logln("Testing:");
1378      UnicodeString frText("C'est MM. Duval.");
1379      frenchBI->setText(frText);
1380      TEST_ASSERT(16 == frenchBI->next());
1381      TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1382      frenchBI->first();
1383      prtbrks(frenchBI.getAlias(), frText, *this);
1384      logln("Testing against English:");
1385      filteredBI->setText(frText);
1386      TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1387      TEST_ASSERT(16 == filteredBI->next());
1388      TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1389      filteredBI->first();
1390      prtbrks(filteredBI.getAlias(), frText, *this);
1391
1392      // Verify ==
1393      TEST_ASSERT_TRUE(*frenchBI   == *frenchBI);
1394      TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
1395      TEST_ASSERT_TRUE(*frenchBI   != *filteredBI);
1396    } else {
1397      dataerrln("French BI: not valid.");
1398	}
1399  }
1400
1401#else
1402  logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1403#endif
1404}
1405
1406//---------------------------------------------
1407// runIndexedTest
1408//---------------------------------------------
1409
1410void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1411{
1412    if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1413    TESTCASE_AUTO_BEGIN;
1414#if !UCONFIG_NO_FILE_IO
1415    TESTCASE_AUTO(TestCloneEquals);
1416    TESTCASE_AUTO(TestgetRules);
1417    TESTCASE_AUTO(TestHashCode);
1418    TESTCASE_AUTO(TestGetSetAdoptText);
1419    TESTCASE_AUTO(TestIteration);
1420#endif
1421    TESTCASE_AUTO(TestBuilder);
1422    TESTCASE_AUTO(TestQuoteGrouping);
1423    TESTCASE_AUTO(TestRuleStatusVec);
1424    TESTCASE_AUTO(TestBug2190);
1425#if !UCONFIG_NO_FILE_IO
1426    TESTCASE_AUTO(TestRegistration);
1427    TESTCASE_AUTO(TestBoilerPlate);
1428    TESTCASE_AUTO(TestRuleStatus);
1429    TESTCASE_AUTO(TestRoundtripRules);
1430    TESTCASE_AUTO(TestGetBinaryRules);
1431#endif
1432    TESTCASE_AUTO(TestRefreshInputText);
1433#if !UCONFIG_NO_BREAK_ITERATION
1434    TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1435#endif
1436    TESTCASE_AUTO_END;
1437}
1438
1439
1440//---------------------------------------------
1441//Internal subroutines
1442//---------------------------------------------
1443
1444void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1445     logln((UnicodeString)"testIsBoundary():");
1446        int32_t p = 0;
1447        UBool isB;
1448        for (int32_t i = 0; i < text.length(); i++) {
1449            isB = bi.isBoundary(i);
1450            logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1451
1452            if (i == boundaries[p]) {
1453                if (!isB)
1454                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1455                p++;
1456            }
1457            else {
1458                if (isB)
1459                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1460            }
1461        }
1462}
1463void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1464    UnicodeString selected;
1465    UnicodeString expected=CharsToUnicodeString(expectedString);
1466
1467    if(gotoffset != expectedOffset)
1468         errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1469    if(start <= gotoffset){
1470        testString.extractBetween(start, gotoffset, selected);
1471    }
1472    else{
1473        testString.extractBetween(gotoffset, start, selected);
1474    }
1475    if(selected.compare(expected) != 0)
1476         errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1477    else
1478        logln(prettify("****selected \"" + selected + "\""));
1479}
1480
1481#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1482