1/********************************************************************
2 * Copyright (c) 1999-2009, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 *   Date        Name        Description
6 *   12/14/99    Madhu        Creation.
7 *   01/12/2000  Madhu        updated for changed API
8 ********************************************************************/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/uchar.h"
15#include "intltest.h"
16#include "unicode/rbbi.h"
17#include "unicode/schriter.h"
18#include "rbbiapts.h"
19#include "rbbidata.h"
20#include "cstring.h"
21#include "ubrkimpl.h"
22#include "unicode/ustring.h"
23#include "unicode/utext.h"
24#include "cmemory.h"
25
26/**
27 * API Test the RuleBasedBreakIterator class
28 */
29
30
31#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
32errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
33
34#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}}
36
37void RBBIAPITest::TestCloneEquals()
38{
39
40    UErrorCode status=U_ZERO_ERROR;
41    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
42    RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
43    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
44    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
45    if(U_FAILURE(status)){
46        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
47        return;
48    }
49
50
51    UnicodeString testString="Testing word break iterators's clone() and equals()";
52    bi1->setText(testString);
53    bi2->setText(testString);
54    biequal->setText(testString);
55
56    bi3->setText("hello");
57
58    logln((UnicodeString)"Testing equals()");
59
60    logln((UnicodeString)"Testing == and !=");
61    UBool b = (*bi1 != *biequal);
62    b |= *bi1 == *bi2;
63    b |= *bi1 == *bi3;
64    if (b) {
65        errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
66    }
67
68    if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
69        errln((UnicodeString)"ERROR:2 RBBI's == and != operator  failed.");
70
71
72    // Quick test of RulesBasedBreakIterator assignment -
73    // Check that
74    //    two different iterators are !=
75    //    they are == after assignment
76    //    source and dest iterator produce the same next() after assignment.
77    //    deleting one doesn't disable the other.
78    logln("Testing assignment");
79    RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
80    if(U_FAILURE(status)){
81        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
82        return;
83    }
84
85    RuleBasedBreakIterator biDefault, biDefault2;
86    if(U_FAILURE(status)){
87        errln((UnicodeString)"FAIL : in construction of default iterator");
88        return;
89    }
90    if (biDefault == *bix) {
91        errln((UnicodeString)"ERROR: iterators should not compare ==");
92        return;
93    }
94    if (biDefault != biDefault2) {
95        errln((UnicodeString)"ERROR: iterators should compare ==");
96        return;
97    }
98
99
100    UnicodeString   HelloString("Hello Kitty");
101    bix->setText(HelloString);
102    if (*bix == *bi2) {
103        errln(UnicodeString("ERROR: strings should not be equal before assignment."));
104    }
105    *bix = *bi2;
106    if (*bix != *bi2) {
107        errln(UnicodeString("ERROR: strings should be equal before assignment."));
108    }
109
110    int bixnext = bix->next();
111    int bi2next = bi2->next();
112    if (! (bixnext == bi2next && bixnext == 7)) {
113        errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
114    }
115    delete bix;
116    if (bi2->next() != 8) {
117        errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
118    }
119
120
121
122    logln((UnicodeString)"Testing clone()");
123    RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
124    RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
125
126    if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
127      *bi1clone == *bi3 || *bi1clone == *bi2)
128        errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
129
130    if(*bi2clone == *bi1 || *bi2clone == *biequal ||
131       *bi2clone == *bi3 || *bi2clone != *bi2)
132        errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
133
134    if(bi1->getText() != bi1clone->getText()   ||
135       bi2clone->getText() != bi2->getText()   ||
136       *bi2clone == *bi1clone )
137        errln((UnicodeString)"ERROR: RBBI's clone() method failed");
138
139    delete bi1clone;
140    delete bi2clone;
141    delete bi1;
142    delete bi3;
143    delete bi2;
144    delete biequal;
145}
146
147void RBBIAPITest::TestBoilerPlate()
148{
149    UErrorCode status = U_ZERO_ERROR;
150    BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
151    BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
152    if (U_FAILURE(status)) {
153        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
154        return;
155    }
156    if(*a!=*b){
157        errln("Failed: boilerplate method operator!= does not return correct results");
158    }
159    // Japanese word break iteratos is identical to root with
160    // a dictionary-based break iterator, but Thai character break iterator
161    // is still different from Root.
162    BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
163    BreakIterator* d = BreakIterator::createCharacterInstance(Locale("th"),status);
164    if(c && d){
165        if(*c==*d){
166            errln("Failed: boilerplate method opertator== does not return correct results");
167        }
168    }else{
169        errln("creation of break iterator failed");
170    }
171    delete a;
172    delete b;
173    delete c;
174    delete d;
175}
176
177void RBBIAPITest::TestgetRules()
178{
179    UErrorCode status=U_ZERO_ERROR;
180
181    RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
182    RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
183    if(U_FAILURE(status)){
184        errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
185        delete bi1;
186        delete bi2;
187        return;
188    }
189
190
191
192    logln((UnicodeString)"Testing toString()");
193
194    bi1->setText((UnicodeString)"Hello there");
195
196    RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
197
198    UnicodeString temp=bi1->getRules();
199    UnicodeString temp2=bi2->getRules();
200    UnicodeString temp3=bi3->getRules();
201    if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
202        errln((UnicodeString)"ERROR: error in getRules() method");
203
204    delete bi1;
205    delete bi2;
206    delete bi3;
207}
208void RBBIAPITest::TestHashCode()
209{
210    UErrorCode status=U_ZERO_ERROR;
211    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
212    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
213    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
214    if(U_FAILURE(status)){
215        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
216        delete bi1;
217        delete bi2;
218        delete bi3;
219        return;
220    }
221
222
223    logln((UnicodeString)"Testing hashCode()");
224
225    bi1->setText((UnicodeString)"Hash code");
226    bi2->setText((UnicodeString)"Hash code");
227    bi3->setText((UnicodeString)"Hash code");
228
229    RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
230    RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
231
232    if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
233        bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
234        errln((UnicodeString)"ERROR: identical objects have different hashcodes");
235
236    if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
237        bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
238        errln((UnicodeString)"ERROR: different objects have same hashcodes");
239
240    delete bi1clone;
241    delete bi2clone;
242    delete bi1;
243    delete bi2;
244    delete bi3;
245
246}
247void RBBIAPITest::TestGetSetAdoptText()
248{
249    logln((UnicodeString)"Testing getText setText ");
250    UErrorCode status=U_ZERO_ERROR;
251    UnicodeString str1="first string.";
252    UnicodeString str2="Second string.";
253    RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
254    RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
255    if(U_FAILURE(status)){
256        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
257            return;
258    }
259
260
261    CharacterIterator* text1= new StringCharacterIterator(str1);
262    CharacterIterator* text1Clone = text1->clone();
263    CharacterIterator* text2= new StringCharacterIterator(str2);
264    CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
265
266    wordIter1->setText(str1);
267    CharacterIterator *tci = &wordIter1->getText();
268    UnicodeString      tstr;
269    tci->getText(tstr);
270    TEST_ASSERT(tstr == str1);
271    if(wordIter1->current() != 0)
272        errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
273
274    wordIter1->next(2);
275
276    wordIter1->setText(str2);
277    if(wordIter1->current() != 0)
278        errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
279
280
281    charIter1->adoptText(text1Clone);
282    TEST_ASSERT(wordIter1->getText() != charIter1->getText());
283    tci = &wordIter1->getText();
284    tci->getText(tstr);
285    TEST_ASSERT(tstr == str2);
286    tci = &charIter1->getText();
287    tci->getText(tstr);
288    TEST_ASSERT(tstr == str1);
289
290
291    RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone();
292    rb->adoptText(text1);
293    if(rb->getText() != *text1)
294        errln((UnicodeString)"ERROR:1 error in adoptText ");
295    rb->adoptText(text2);
296    if(rb->getText() != *text2)
297        errln((UnicodeString)"ERROR:2 error in adoptText ");
298
299    // Adopt where iterator range is less than the entire orignal source string.
300    //   (With the change of the break engine to working with UText internally,
301    //    CharacterIterators starting at positions other than zero are not supported)
302    rb->adoptText(text3);
303    TEST_ASSERT(rb->preceding(2) == 0);
304    TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
305    //if(rb->preceding(2) != 3) {
306    //    errln((UnicodeString)"ERROR:3 error in adoptText ");
307    //}
308    //if(rb->following(11) != BreakIterator::DONE) {
309    //    errln((UnicodeString)"ERROR:4 error in adoptText ");
310    //}
311
312    // UText API
313    //
314    //   Quick test to see if UText is working at all.
315    //
316    const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
317    const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
318    //                012345678901
319
320    status = U_ZERO_ERROR;
321    UText *ut = utext_openUTF8(NULL, s1, -1, &status);
322    wordIter1->setText(ut, status);
323    TEST_ASSERT_SUCCESS(status);
324
325    int32_t pos;
326    pos = wordIter1->first();
327    TEST_ASSERT(pos==0);
328    pos = wordIter1->next();
329    TEST_ASSERT(pos==5);
330    pos = wordIter1->next();
331    TEST_ASSERT(pos==6);
332    pos = wordIter1->next();
333    TEST_ASSERT(pos==11);
334    pos = wordIter1->next();
335    TEST_ASSERT(pos==UBRK_DONE);
336
337    status = U_ZERO_ERROR;
338    UText *ut2 = utext_openUTF8(NULL, s2, -1, &status);
339    TEST_ASSERT_SUCCESS(status);
340    wordIter1->setText(ut2, status);
341    TEST_ASSERT_SUCCESS(status);
342
343    pos = wordIter1->first();
344    TEST_ASSERT(pos==0);
345    pos = wordIter1->next();
346    TEST_ASSERT(pos==3);
347    pos = wordIter1->next();
348    TEST_ASSERT(pos==4);
349
350    pos = wordIter1->last();
351    TEST_ASSERT(pos==6);
352    pos = wordIter1->previous();
353    TEST_ASSERT(pos==4);
354    pos = wordIter1->previous();
355    TEST_ASSERT(pos==3);
356    pos = wordIter1->previous();
357    TEST_ASSERT(pos==0);
358    pos = wordIter1->previous();
359    TEST_ASSERT(pos==UBRK_DONE);
360
361    status = U_ZERO_ERROR;
362    UnicodeString sEmpty;
363    UText *gut2 = utext_openUnicodeString(NULL, &sEmpty, &status);
364    wordIter1->getUText(gut2, status);
365    TEST_ASSERT_SUCCESS(status);
366    utext_close(gut2);
367
368    utext_close(ut);
369    utext_close(ut2);
370
371    delete wordIter1;
372    delete charIter1;
373    delete rb;
374
375 }
376
377
378void RBBIAPITest::TestIteration()
379{
380    // This test just verifies that the API is present.
381    // Testing for correct operation of the break rules happens elsewhere.
382
383    UErrorCode status=U_ZERO_ERROR;
384    RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
385    if (U_FAILURE(status) || bi == NULL)  {
386        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
387    }
388    delete bi;
389
390    status=U_ZERO_ERROR;
391    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
392    if (U_FAILURE(status) || bi == NULL)  {
393        errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
394    }
395    delete bi;
396
397    status=U_ZERO_ERROR;
398    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
399    if (U_FAILURE(status) || bi == NULL)  {
400        errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
401    }
402    delete bi;
403
404    status=U_ZERO_ERROR;
405    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
406    if (U_FAILURE(status) || bi == NULL)  {
407        errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
408    }
409    delete bi;
410
411    status=U_ZERO_ERROR;
412    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
413    if (U_FAILURE(status) || bi == NULL)  {
414        errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
415    }
416    delete bi;
417
418    status=U_ZERO_ERROR;
419    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
420    if (U_FAILURE(status) || bi == NULL)  {
421        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
422        return;   // Skip the rest of these tests.
423    }
424
425
426    UnicodeString testString="0123456789";
427    bi->setText(testString);
428
429    int32_t i;
430    i = bi->first();
431    if (i != 0) {
432        errln("Incorrect value from bi->first().  Expected 0, got %d.", i);
433    }
434
435    i = bi->last();
436    if (i != 10) {
437        errln("Incorrect value from bi->last().  Expected 10, got %d", i);
438    }
439
440    //
441    // Previous
442    //
443    bi->last();
444    i = bi->previous();
445    if (i != 9) {
446        errln("Incorrect value from bi->last() at line %d.  Expected 9, got %d", __LINE__, i);
447    }
448
449
450    bi->first();
451    i = bi->previous();
452    if (i != BreakIterator::DONE) {
453        errln("Incorrect value from bi->previous() at line %d.  Expected DONE, got %d", __LINE__, i);
454    }
455
456    //
457    // next()
458    //
459    bi->first();
460    i = bi->next();
461    if (i != 1) {
462        errln("Incorrect value from bi->next() at line %d.  Expected 1, got %d", __LINE__, i);
463    }
464
465    bi->last();
466    i = bi->next();
467    if (i != BreakIterator::DONE) {
468        errln("Incorrect value from bi->next() at line %d.  Expected DONE, got %d", __LINE__, i);
469    }
470
471
472    //
473    //  current()
474    //
475    bi->first();
476    i = bi->current();
477    if (i != 0) {
478        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
479    }
480
481    bi->next();
482    i = bi->current();
483    if (i != 1) {
484        errln("Incorrect value from bi->previous() at line %d.  Expected 1, got %d", __LINE__, i);
485    }
486
487    bi->last();
488    bi->next();
489    i = bi->current();
490    if (i != 10) {
491        errln("Incorrect value from bi->previous() at line %d.  Expected 10, got %d", __LINE__, i);
492    }
493
494    bi->first();
495    bi->previous();
496    i = bi->current();
497    if (i != 0) {
498        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
499    }
500
501
502    //
503    // Following()
504    //
505    i = bi->following(4);
506    if (i != 5) {
507        errln("Incorrect value from bi->following() at line %d.  Expected 5, got %d", __LINE__, i);
508    }
509
510    i = bi->following(9);
511    if (i != 10) {
512        errln("Incorrect value from bi->following() at line %d.  Expected 10, got %d", __LINE__, i);
513    }
514
515    i = bi->following(10);
516    if (i != BreakIterator::DONE) {
517        errln("Incorrect value from bi->following() at line %d.  Expected DONE, got %d", __LINE__, i);
518    }
519
520
521    //
522    // Preceding
523    //
524    i = bi->preceding(4);
525    if (i != 3) {
526        errln("Incorrect value from bi->preceding() at line %d.  Expected 3, got %d", __LINE__, i);
527    }
528
529    i = bi->preceding(10);
530    if (i != 9) {
531        errln("Incorrect value from bi->preceding() at line %d.  Expected 9, got %d", __LINE__, i);
532    }
533
534    i = bi->preceding(1);
535    if (i != 0) {
536        errln("Incorrect value from bi->preceding() at line %d.  Expected 0, got %d", __LINE__, i);
537    }
538
539    i = bi->preceding(0);
540    if (i != BreakIterator::DONE) {
541        errln("Incorrect value from bi->preceding() at line %d.  Expected DONE, got %d", __LINE__, i);
542    }
543
544
545    //
546    // isBoundary()
547    //
548    bi->first();
549    if (bi->isBoundary(3) != TRUE) {
550        errln("Incorrect value from bi->isBoudary() at line %d.  Expected TRUE, got FALSE", __LINE__, i);
551    }
552    i = bi->current();
553    if (i != 3) {
554        errln("Incorrect value from bi->current() at line %d.  Expected 3, got %d", __LINE__, i);
555    }
556
557
558    if (bi->isBoundary(11) != FALSE) {
559        errln("Incorrect value from bi->isBoudary() at line %d.  Expected FALSE, got TRUE", __LINE__, i);
560    }
561    i = bi->current();
562    if (i != 10) {
563        errln("Incorrect value from bi->current() at line %d.  Expected 10, got %d", __LINE__, i);
564    }
565
566    //
567    // next(n)
568    //
569    bi->first();
570    i = bi->next(4);
571    if (i != 4) {
572        errln("Incorrect value from bi->next() at line %d.  Expected 4, got %d", __LINE__, i);
573    }
574
575    i = bi->next(6);
576    if (i != 10) {
577        errln("Incorrect value from bi->next() at line %d.  Expected 10, got %d", __LINE__, i);
578    }
579
580    bi->first();
581    i = bi->next(11);
582    if (i != BreakIterator::DONE) {
583        errln("Incorrect value from bi->next() at line %d.  Expected BreakIterator::DONE, got %d", __LINE__, i);
584    }
585
586    delete bi;
587
588}
589
590
591
592
593
594
595void RBBIAPITest::TestBuilder() {
596     UnicodeString rulesString1 = "$Letters = [:L:];\n"
597                                  "$Numbers = [:N:];\n"
598                                  "$Letters+;\n"
599                                  "$Numbers+;\n"
600                                  "[^$Letters $Numbers];\n"
601                                  "!.*;\n";
602     UnicodeString testString1  = "abc123..abc";
603                                // 01234567890
604     int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
605     UErrorCode status=U_ZERO_ERROR;
606     UParseError    parseError;
607
608     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
609     if(U_FAILURE(status)) {
610         dataerrln("Fail : in construction - %s", u_errorName(status));
611     } else {
612         bi->setText(testString1);
613         doBoundaryTest(*bi, testString1, bounds1);
614     }
615     delete bi;
616}
617
618
619//
620//  TestQuoteGrouping
621//       Single quotes within rules imply a grouping, so that a modifier
622//       following the quoted text (* or +) applies to all of the quoted chars.
623//
624void RBBIAPITest::TestQuoteGrouping() {
625     UnicodeString rulesString1 = "#Here comes the rule...\n"
626                                  "'$@!'*;\n"   //  (\$\@\!)*
627                                  ".;\n";
628
629     UnicodeString testString1  = "$@!$@!X$@!!X";
630                                // 0123456789012
631     int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
632     UErrorCode status=U_ZERO_ERROR;
633     UParseError    parseError;
634
635     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
636     if(U_FAILURE(status)) {
637         dataerrln("Fail : in construction - %s", u_errorName(status));
638     } else {
639         bi->setText(testString1);
640         doBoundaryTest(*bi, testString1, bounds1);
641     }
642     delete bi;
643}
644
645//
646//  TestRuleStatus
647//      Test word break rule status constants.
648//
649void RBBIAPITest::TestRuleStatus() {
650     UChar str[30];
651     //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
652     // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
653     u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
654              // 012345678901234567  8      9    0
655              //                     Katakana
656                str, 30);
657     UnicodeString testString1(str);
658     int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
659     int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
660                          UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
661                          UBRK_WORD_IDEO,     UBRK_WORD_NONE};
662
663     int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
664                          UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
665                          UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
666
667     UErrorCode status=U_ZERO_ERROR;
668
669     RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
670     if(U_FAILURE(status)) {
671         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
672     } else {
673         bi->setText(testString1);
674         // First test that the breaks are in the right spots.
675         doBoundaryTest(*bi, testString1, bounds1);
676
677         // Then go back and check tag values
678         int32_t i = 0;
679         int32_t pos, tag;
680         for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
681             if (pos != bounds1[i]) {
682                 errln("FAIL: unexpected word break at postion %d", pos);
683                 break;
684             }
685             tag = bi->getRuleStatus();
686             if (tag < tag_lo[i] || tag >= tag_hi[i]) {
687                 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
688                 break;
689             }
690
691             // Check that we get the same tag values from getRuleStatusVec()
692             int32_t vec[10];
693             int t = bi->getRuleStatusVec(vec, 10, status);
694             TEST_ASSERT_SUCCESS(status);
695             TEST_ASSERT(t==1);
696             TEST_ASSERT(vec[0] == tag);
697         }
698     }
699     delete bi;
700
701     // Now test line break status.  This test mostly is to confirm that the status constants
702     //                              are correctly declared in the header.
703     testString1 =   "test line. \n";
704     // break type    s    s     h
705
706     bi = (RuleBasedBreakIterator *)
707         BreakIterator::createLineInstance(Locale::getEnglish(), status);
708     if(U_FAILURE(status)) {
709         errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
710     } else {
711         int32_t i = 0;
712         int32_t pos, tag;
713         UBool   success;
714
715         bi->setText(testString1);
716         pos = bi->current();
717         tag = bi->getRuleStatus();
718         for (i=0; i<3; i++) {
719             switch (i) {
720             case 0:
721                 success = pos==0  && tag==UBRK_LINE_SOFT; break;
722             case 1:
723                 success = pos==5  && tag==UBRK_LINE_SOFT; break;
724             case 2:
725                 success = pos==12 && tag==UBRK_LINE_HARD; break;
726             default:
727                 success = FALSE; break;
728             }
729             if (success == FALSE) {
730                 errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
731                     i, pos, tag);
732                 break;
733             }
734             pos = bi->next();
735             tag = bi->getRuleStatus();
736         }
737         if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
738             UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
739             UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) {
740             errln("UBRK_LINE_* constants from header are inconsistent.");
741         }
742     }
743     delete bi;
744
745}
746
747
748//
749//  TestRuleStatusVec
750//      Test the vector form of  break rule status.
751//
752void RBBIAPITest::TestRuleStatusVec() {
753    UnicodeString rulesString(   "[A-N]{100}; \n"
754                                 "[a-w]{200}; \n"
755                                 "[\\p{L}]{300}; \n"
756                                 "[\\p{N}]{400}; \n"
757                                 "[0-5]{500}; \n"
758                                  "!.*;\n", -1, US_INV);
759     UnicodeString testString1  = "Aapz5?";
760     int32_t  statusVals[10];
761     int32_t  numStatuses;
762     int32_t  pos;
763
764     UErrorCode status=U_ZERO_ERROR;
765     UParseError    parseError;
766
767     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
768     if (U_FAILURE(status)) {
769         dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
770     } else {
771         bi->setText(testString1);
772
773         // A
774         pos = bi->next();
775         TEST_ASSERT(pos==1);
776         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
777         TEST_ASSERT_SUCCESS(status);
778         TEST_ASSERT(numStatuses == 2);
779         TEST_ASSERT(statusVals[0] == 100);
780         TEST_ASSERT(statusVals[1] == 300);
781
782         // a
783         pos = bi->next();
784         TEST_ASSERT(pos==2);
785         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
786         TEST_ASSERT_SUCCESS(status);
787         TEST_ASSERT(numStatuses == 2);
788         TEST_ASSERT(statusVals[0] == 200);
789         TEST_ASSERT(statusVals[1] == 300);
790
791         // p
792         pos = bi->next();
793         TEST_ASSERT(pos==3);
794         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
795         TEST_ASSERT_SUCCESS(status);
796         TEST_ASSERT(numStatuses == 2);
797         TEST_ASSERT(statusVals[0] == 200);
798         TEST_ASSERT(statusVals[1] == 300);
799
800         // z
801         pos = bi->next();
802         TEST_ASSERT(pos==4);
803         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
804         TEST_ASSERT_SUCCESS(status);
805         TEST_ASSERT(numStatuses == 1);
806         TEST_ASSERT(statusVals[0] == 300);
807
808         // 5
809         pos = bi->next();
810         TEST_ASSERT(pos==5);
811         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
812         TEST_ASSERT_SUCCESS(status);
813         TEST_ASSERT(numStatuses == 2);
814         TEST_ASSERT(statusVals[0] == 400);
815         TEST_ASSERT(statusVals[1] == 500);
816
817         // ?
818         pos = bi->next();
819         TEST_ASSERT(pos==6);
820         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
821         TEST_ASSERT_SUCCESS(status);
822         TEST_ASSERT(numStatuses == 1);
823         TEST_ASSERT(statusVals[0] == 0);
824
825         //
826         //  Check buffer overflow error handling.   Char == A
827         //
828         bi->first();
829         pos = bi->next();
830         TEST_ASSERT(pos==1);
831         memset(statusVals, -1, sizeof(statusVals));
832         numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
833         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
834         TEST_ASSERT(numStatuses == 2);
835         TEST_ASSERT(statusVals[0] == -1);
836
837         status = U_ZERO_ERROR;
838         memset(statusVals, -1, sizeof(statusVals));
839         numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
840         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
841         TEST_ASSERT(numStatuses == 2);
842         TEST_ASSERT(statusVals[0] == 100);
843         TEST_ASSERT(statusVals[1] == -1);
844
845         status = U_ZERO_ERROR;
846         memset(statusVals, -1, sizeof(statusVals));
847         numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
848         TEST_ASSERT_SUCCESS(status);
849         TEST_ASSERT(numStatuses == 2);
850         TEST_ASSERT(statusVals[0] == 100);
851         TEST_ASSERT(statusVals[1] == 300);
852         TEST_ASSERT(statusVals[2] == -1);
853     }
854     delete bi;
855
856}
857
858//
859//   Bug 2190 Regression test.   Builder crash on rule consisting of only a
860//                               $variable reference
861void RBBIAPITest::TestBug2190() {
862     UnicodeString rulesString1 = "$aaa = abcd;\n"
863                                  "$bbb = $aaa;\n"
864                                  "$bbb;\n";
865     UnicodeString testString1  = "abcdabcd";
866                                // 01234567890
867     int32_t bounds1[] = {0, 4, 8};
868     UErrorCode status=U_ZERO_ERROR;
869     UParseError    parseError;
870
871     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
872     if(U_FAILURE(status)) {
873         dataerrln("Fail : in construction - %s", u_errorName(status));
874     } else {
875         bi->setText(testString1);
876         doBoundaryTest(*bi, testString1, bounds1);
877     }
878     delete bi;
879}
880
881
882void RBBIAPITest::TestRegistration() {
883#if !UCONFIG_NO_SERVICE
884    UErrorCode status = U_ZERO_ERROR;
885    BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
886
887    // ok to not delete these if we exit because of error?
888    BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
889    BreakIterator* root_word = BreakIterator::createWordInstance("", status);
890    BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
891
892    if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
893        dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
894        delete ja_word;
895        delete ja_char;
896        delete root_word;
897        delete root_char;
898
899        return;
900    }
901
902    URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
903    {
904#if 0 // With a dictionary based word breaking, ja_word is identical to root.
905        if (ja_word && *ja_word == *root_word) {
906            errln("japan not different from root");
907        }
908#endif
909    }
910
911    {
912        BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
913        UBool fail = TRUE;
914        if(result){
915            fail = *result != *ja_word;
916        }
917        delete result;
918        if (fail) {
919            errln("bad result for xx_XX/word");
920        }
921    }
922
923    {
924        BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
925        UBool fail = TRUE;
926        if(result){
927            fail = *result != *ja_char;
928        }
929        delete result;
930        if (fail) {
931            errln("bad result for ja_JP/char");
932        }
933    }
934
935    {
936        BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
937        UBool fail = TRUE;
938        if(result){
939            fail = *result != *root_char;
940        }
941        delete result;
942        if (fail) {
943            errln("bad result for xx_XX/char");
944        }
945    }
946
947    {
948        StringEnumeration* avail = BreakIterator::getAvailableLocales();
949        UBool found = FALSE;
950        const UnicodeString* p;
951        while ((p = avail->snext(status))) {
952            if (p->compare("xx") == 0) {
953                found = TRUE;
954                break;
955            }
956        }
957        delete avail;
958        if (!found) {
959            errln("did not find test locale");
960        }
961    }
962
963    {
964        UBool unreg = BreakIterator::unregister(key, status);
965        if (!unreg) {
966            errln("unable to unregister");
967        }
968    }
969
970    {
971        BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
972        BreakIterator* root = BreakIterator::createWordInstance("", status);
973        UBool fail = TRUE;
974        if(root){
975          fail = *root != *result;
976        }
977        delete root;
978        delete result;
979        if (fail) {
980            errln("did not get root break");
981        }
982    }
983
984    {
985        StringEnumeration* avail = BreakIterator::getAvailableLocales();
986        UBool found = FALSE;
987        const UnicodeString* p;
988        while ((p = avail->snext(status))) {
989            if (p->compare("xx") == 0) {
990                found = TRUE;
991                break;
992            }
993        }
994        delete avail;
995        if (found) {
996            errln("found test locale");
997        }
998    }
999
1000    {
1001        int32_t count;
1002        UBool   foundLocale = FALSE;
1003        const Locale *avail = BreakIterator::getAvailableLocales(count);
1004        for (int i=0; i<count; i++) {
1005            if (avail[i] == Locale::getEnglish()) {
1006                foundLocale = TRUE;
1007                break;
1008            }
1009        }
1010        if (foundLocale == FALSE) {
1011            errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1012        }
1013    }
1014
1015
1016    // ja_word was adopted by factory
1017    delete ja_char;
1018    delete root_word;
1019    delete root_char;
1020#endif
1021}
1022
1023void RBBIAPITest::RoundtripRule(const char *dataFile) {
1024    UErrorCode status = U_ZERO_ERROR;
1025    UParseError parseError;
1026    parseError.line = 0;
1027    parseError.offset = 0;
1028    UDataMemory *data = udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status);
1029    uint32_t length;
1030    const UChar *builtSource;
1031    const uint8_t *rbbiRules;
1032    const uint8_t *builtRules;
1033
1034    if (U_FAILURE(status)) {
1035        errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status));
1036        return;
1037    }
1038
1039    builtRules = (const uint8_t *)udata_getMemory(data);
1040    builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1041    RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1042    if (U_FAILURE(status)) {
1043        errln("createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1044                u_errorName(status), parseError.line, parseError.offset);
1045        return;
1046    };
1047    rbbiRules = brkItr->getBinaryRules(length);
1048    logln("Comparing \"%s\" len=%d", dataFile, length);
1049    if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1050        errln("Built rules and rebuilt rules are different %s", dataFile);
1051        return;
1052    }
1053    delete brkItr;
1054    udata_close(data);
1055}
1056
1057void RBBIAPITest::TestRoundtripRules() {
1058    RoundtripRule("word");
1059    RoundtripRule("title");
1060    RoundtripRule("sent");
1061    RoundtripRule("line");
1062    RoundtripRule("char");
1063    if (!quick) {
1064        RoundtripRule("word_ja");
1065        RoundtripRule("word_POSIX");
1066    }
1067}
1068
1069// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1070// (these are protected so we access them via a local class RBBIWithProtectedFunctions).
1071// This is just a sanity check, not a thorough test (e.g. we don't check that the
1072// first delete actually frees rulesCopy).
1073void RBBIAPITest::TestCreateFromRBBIData() {
1074    // Get some handy RBBIData
1075    const char *brkName = "word"; // or "sent", "line", "char", etc.
1076    UErrorCode status = U_ZERO_ERROR;
1077    UDataMemory * data = udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status);
1078    if ( U_SUCCESS(status) ) {
1079        const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data);
1080        uint32_t length = builtRules->fLength;
1081        RBBIWithProtectedFunctions * brkItr;
1082
1083        // Try the memory-adopting constructor, need to copy the data first
1084        RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
1085        if ( rulesCopy ) {
1086            uprv_memcpy( rulesCopy, builtRules, length );
1087
1088            brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
1089            if ( U_SUCCESS(status) ) {
1090                delete brkItr; // this should free rulesCopy
1091            } else {
1092                errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
1093                status = U_ZERO_ERROR;// reset for the next test
1094                uprv_free( rulesCopy );
1095            }
1096        }
1097
1098        // Now try the non-adopting constructor
1099        brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
1100        if ( U_SUCCESS(status) ) {
1101            delete brkItr; // this should NOT attempt to free builtRules
1102            if (builtRules->fLength != length) { // sanity check
1103                errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
1104            }
1105        } else {
1106            errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
1107        }
1108
1109        udata_close(data);
1110    }
1111}
1112
1113//---------------------------------------------
1114// runIndexedTest
1115//---------------------------------------------
1116
1117void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1118{
1119    if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1120    switch (index) {
1121     //   case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1122        case  0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1123        case  1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1124        case  2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1125        case  3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1126        case  4: name = "TestIteration"; if (exec) TestIteration(); break;
1127        case  5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1128        case  6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
1129        case  7: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1130        case  8: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1131        case  9: name = "TestBug2190"; if (exec) TestBug2190(); break;
1132        case 10: name = "TestRegistration"; if (exec) TestRegistration(); break;
1133        case 11: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1134        case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1135        case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
1136
1137        default: name = ""; break; // needed to end loop
1138    }
1139}
1140
1141//---------------------------------------------
1142//Internal subroutines
1143//---------------------------------------------
1144
1145void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1146     logln((UnicodeString)"testIsBoundary():");
1147        int32_t p = 0;
1148        UBool isB;
1149        for (int32_t i = 0; i < text.length(); i++) {
1150            isB = bi.isBoundary(i);
1151            logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1152
1153            if (i == boundaries[p]) {
1154                if (!isB)
1155                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1156                p++;
1157            }
1158            else {
1159                if (isB)
1160                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1161            }
1162        }
1163}
1164void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1165    UnicodeString selected;
1166    UnicodeString expected=CharsToUnicodeString(expectedString);
1167
1168    if(gotoffset != expectedOffset)
1169         errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1170    if(start <= gotoffset){
1171        testString.extractBetween(start, gotoffset, selected);
1172    }
1173    else{
1174        testString.extractBetween(gotoffset, start, selected);
1175    }
1176    if(selected.compare(expected) != 0)
1177         errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1178    else
1179        logln(prettify("****selected \"" + selected + "\""));
1180}
1181
1182//---------------------------------------------
1183//RBBIWithProtectedFunctions class functions
1184//---------------------------------------------
1185
1186RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
1187    : RuleBasedBreakIterator(data, status)
1188{
1189}
1190
1191RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
1192    : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
1193{
1194}
1195
1196#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1197