rbbiapts.cpp revision 6d5deb12725f146643d443090dfa11b206df528a
1/********************************************************************
2 * Copyright (c) 1999-2009, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 *   Date        Name        Description
6 *   12/14/99    Madhu        Creation.
7 *   01/12/2000  Madhu        updated for changed API
8 ********************************************************************/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/uchar.h"
15#include "intltest.h"
16#include "unicode/rbbi.h"
17#include "unicode/schriter.h"
18#include "rbbiapts.h"
19#include "rbbidata.h"
20#include "cstring.h"
21#include "ubrkimpl.h"
22#include "unicode/ustring.h"
23#include "unicode/utext.h"
24#include "cmemory.h"
25
26/**
27 * API Test the RuleBasedBreakIterator class
28 */
29
30
31#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
32errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
33
34#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}}
36
37void RBBIAPITest::TestCloneEquals()
38{
39
40    UErrorCode status=U_ZERO_ERROR;
41    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
42    RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
43    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
44    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
45    if(U_FAILURE(status)){
46        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
47        return;
48    }
49
50
51    UnicodeString testString="Testing word break iterators's clone() and equals()";
52    bi1->setText(testString);
53    bi2->setText(testString);
54    biequal->setText(testString);
55
56    bi3->setText("hello");
57
58    logln((UnicodeString)"Testing equals()");
59
60    logln((UnicodeString)"Testing == and !=");
61    UBool b = (*bi1 != *biequal);
62    b |= *bi1 == *bi2;
63    b |= *bi1 == *bi3;
64    if (b) {
65        errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
66    }
67
68    if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
69        errln((UnicodeString)"ERROR:2 RBBI's == and != operator  failed.");
70
71
72    // Quick test of RulesBasedBreakIterator assignment -
73    // Check that
74    //    two different iterators are !=
75    //    they are == after assignment
76    //    source and dest iterator produce the same next() after assignment.
77    //    deleting one doesn't disable the other.
78    logln("Testing assignment");
79    RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
80    if(U_FAILURE(status)){
81        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
82        return;
83    }
84
85    RuleBasedBreakIterator biDefault, biDefault2;
86    if(U_FAILURE(status)){
87        errln((UnicodeString)"FAIL : in construction of default iterator");
88        return;
89    }
90    if (biDefault == *bix) {
91        errln((UnicodeString)"ERROR: iterators should not compare ==");
92        return;
93    }
94    if (biDefault != biDefault2) {
95        errln((UnicodeString)"ERROR: iterators should compare ==");
96        return;
97    }
98
99
100    UnicodeString   HelloString("Hello Kitty");
101    bix->setText(HelloString);
102    if (*bix == *bi2) {
103        errln(UnicodeString("ERROR: strings should not be equal before assignment."));
104    }
105    *bix = *bi2;
106    if (*bix != *bi2) {
107        errln(UnicodeString("ERROR: strings should be equal before assignment."));
108    }
109
110    int bixnext = bix->next();
111    int bi2next = bi2->next();
112    if (! (bixnext == bi2next && bixnext == 7)) {
113        errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
114    }
115    delete bix;
116    if (bi2->next() != 8) {
117        errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
118    }
119
120
121
122    logln((UnicodeString)"Testing clone()");
123    RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
124    RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
125
126    if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
127      *bi1clone == *bi3 || *bi1clone == *bi2)
128        errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
129
130    if(*bi2clone == *bi1 || *bi2clone == *biequal ||
131       *bi2clone == *bi3 || *bi2clone != *bi2)
132        errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
133
134    if(bi1->getText() != bi1clone->getText()   ||
135       bi2clone->getText() != bi2->getText()   ||
136       *bi2clone == *bi1clone )
137        errln((UnicodeString)"ERROR: RBBI's clone() method failed");
138
139    delete bi1clone;
140    delete bi2clone;
141    delete bi1;
142    delete bi3;
143    delete bi2;
144    delete biequal;
145}
146
147void RBBIAPITest::TestBoilerPlate()
148{
149    UErrorCode status = U_ZERO_ERROR;
150    BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
151    BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
152    if (U_FAILURE(status)) {
153        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
154        return;
155    }
156    if(*a!=*b){
157        errln("Failed: boilerplate method operator!= does not return correct results");
158    }
159    BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status);
160    if(a && c){
161        if(*c==*a){
162            errln("Failed: boilerplate method opertator== does not return correct results");
163        }
164    }else{
165        errln("creation of break iterator failed");
166    }
167    delete a;
168    delete b;
169    delete c;
170}
171
172void RBBIAPITest::TestgetRules()
173{
174    UErrorCode status=U_ZERO_ERROR;
175
176    RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
177    RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
178    if(U_FAILURE(status)){
179        errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
180        delete bi1;
181        delete bi2;
182        return;
183    }
184
185
186
187    logln((UnicodeString)"Testing toString()");
188
189    bi1->setText((UnicodeString)"Hello there");
190
191    RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
192
193    UnicodeString temp=bi1->getRules();
194    UnicodeString temp2=bi2->getRules();
195    UnicodeString temp3=bi3->getRules();
196    if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
197        errln((UnicodeString)"ERROR: error in getRules() method");
198
199    delete bi1;
200    delete bi2;
201    delete bi3;
202}
203void RBBIAPITest::TestHashCode()
204{
205    UErrorCode status=U_ZERO_ERROR;
206    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
207    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
208    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
209    if(U_FAILURE(status)){
210        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
211        delete bi1;
212        delete bi2;
213        delete bi3;
214        return;
215    }
216
217
218    logln((UnicodeString)"Testing hashCode()");
219
220    bi1->setText((UnicodeString)"Hash code");
221    bi2->setText((UnicodeString)"Hash code");
222    bi3->setText((UnicodeString)"Hash code");
223
224    RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
225    RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
226
227    if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
228        bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
229        errln((UnicodeString)"ERROR: identical objects have different hashcodes");
230
231    if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
232        bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
233        errln((UnicodeString)"ERROR: different objects have same hashcodes");
234
235    delete bi1clone;
236    delete bi2clone;
237    delete bi1;
238    delete bi2;
239    delete bi3;
240
241}
242void RBBIAPITest::TestGetSetAdoptText()
243{
244    logln((UnicodeString)"Testing getText setText ");
245    UErrorCode status=U_ZERO_ERROR;
246    UnicodeString str1="first string.";
247    UnicodeString str2="Second string.";
248    RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
249    RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
250    if(U_FAILURE(status)){
251        errcheckln(status, "Fail : in construction - %s", u_errorName(status));
252            return;
253    }
254
255
256    CharacterIterator* text1= new StringCharacterIterator(str1);
257    CharacterIterator* text1Clone = text1->clone();
258    CharacterIterator* text2= new StringCharacterIterator(str2);
259    CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
260
261    wordIter1->setText(str1);
262    CharacterIterator *tci = &wordIter1->getText();
263    UnicodeString      tstr;
264    tci->getText(tstr);
265    TEST_ASSERT(tstr == str1);
266    if(wordIter1->current() != 0)
267        errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
268
269    wordIter1->next(2);
270
271    wordIter1->setText(str2);
272    if(wordIter1->current() != 0)
273        errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
274
275
276    charIter1->adoptText(text1Clone);
277    TEST_ASSERT(wordIter1->getText() != charIter1->getText());
278    tci = &wordIter1->getText();
279    tci->getText(tstr);
280    TEST_ASSERT(tstr == str2);
281    tci = &charIter1->getText();
282    tci->getText(tstr);
283    TEST_ASSERT(tstr == str1);
284
285
286    RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone();
287    rb->adoptText(text1);
288    if(rb->getText() != *text1)
289        errln((UnicodeString)"ERROR:1 error in adoptText ");
290    rb->adoptText(text2);
291    if(rb->getText() != *text2)
292        errln((UnicodeString)"ERROR:2 error in adoptText ");
293
294    // Adopt where iterator range is less than the entire orignal source string.
295    //   (With the change of the break engine to working with UText internally,
296    //    CharacterIterators starting at positions other than zero are not supported)
297    rb->adoptText(text3);
298    TEST_ASSERT(rb->preceding(2) == 0);
299    TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
300    //if(rb->preceding(2) != 3) {
301    //    errln((UnicodeString)"ERROR:3 error in adoptText ");
302    //}
303    //if(rb->following(11) != BreakIterator::DONE) {
304    //    errln((UnicodeString)"ERROR:4 error in adoptText ");
305    //}
306
307    // UText API
308    //
309    //   Quick test to see if UText is working at all.
310    //
311    const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
312    const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
313    //                012345678901
314
315    status = U_ZERO_ERROR;
316    UText *ut = utext_openUTF8(NULL, s1, -1, &status);
317    wordIter1->setText(ut, status);
318    TEST_ASSERT_SUCCESS(status);
319
320    int32_t pos;
321    pos = wordIter1->first();
322    TEST_ASSERT(pos==0);
323    pos = wordIter1->next();
324    TEST_ASSERT(pos==5);
325    pos = wordIter1->next();
326    TEST_ASSERT(pos==6);
327    pos = wordIter1->next();
328    TEST_ASSERT(pos==11);
329    pos = wordIter1->next();
330    TEST_ASSERT(pos==UBRK_DONE);
331
332    status = U_ZERO_ERROR;
333    UText *ut2 = utext_openUTF8(NULL, s2, -1, &status);
334    TEST_ASSERT_SUCCESS(status);
335    wordIter1->setText(ut2, status);
336    TEST_ASSERT_SUCCESS(status);
337
338    pos = wordIter1->first();
339    TEST_ASSERT(pos==0);
340    pos = wordIter1->next();
341    TEST_ASSERT(pos==3);
342    pos = wordIter1->next();
343    TEST_ASSERT(pos==4);
344
345    pos = wordIter1->last();
346    TEST_ASSERT(pos==6);
347    pos = wordIter1->previous();
348    TEST_ASSERT(pos==4);
349    pos = wordIter1->previous();
350    TEST_ASSERT(pos==3);
351    pos = wordIter1->previous();
352    TEST_ASSERT(pos==0);
353    pos = wordIter1->previous();
354    TEST_ASSERT(pos==UBRK_DONE);
355
356    status = U_ZERO_ERROR;
357    UnicodeString sEmpty;
358    UText *gut2 = utext_openUnicodeString(NULL, &sEmpty, &status);
359    wordIter1->getUText(gut2, status);
360    TEST_ASSERT_SUCCESS(status);
361    utext_close(gut2);
362
363    utext_close(ut);
364    utext_close(ut2);
365
366    delete wordIter1;
367    delete charIter1;
368    delete rb;
369
370 }
371
372
373void RBBIAPITest::TestIteration()
374{
375    // This test just verifies that the API is present.
376    // Testing for correct operation of the break rules happens elsewhere.
377
378    UErrorCode status=U_ZERO_ERROR;
379    RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
380    if (U_FAILURE(status) || bi == NULL)  {
381        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
382    }
383    delete bi;
384
385    status=U_ZERO_ERROR;
386    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
387    if (U_FAILURE(status) || bi == NULL)  {
388        errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
389    }
390    delete bi;
391
392    status=U_ZERO_ERROR;
393    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
394    if (U_FAILURE(status) || bi == NULL)  {
395        errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
396    }
397    delete bi;
398
399    status=U_ZERO_ERROR;
400    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
401    if (U_FAILURE(status) || bi == NULL)  {
402        errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
403    }
404    delete bi;
405
406    status=U_ZERO_ERROR;
407    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
408    if (U_FAILURE(status) || bi == NULL)  {
409        errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
410    }
411    delete bi;
412
413    status=U_ZERO_ERROR;
414    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
415    if (U_FAILURE(status) || bi == NULL)  {
416        errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
417        return;   // Skip the rest of these tests.
418    }
419
420
421    UnicodeString testString="0123456789";
422    bi->setText(testString);
423
424    int32_t i;
425    i = bi->first();
426    if (i != 0) {
427        errln("Incorrect value from bi->first().  Expected 0, got %d.", i);
428    }
429
430    i = bi->last();
431    if (i != 10) {
432        errln("Incorrect value from bi->last().  Expected 10, got %d", i);
433    }
434
435    //
436    // Previous
437    //
438    bi->last();
439    i = bi->previous();
440    if (i != 9) {
441        errln("Incorrect value from bi->last() at line %d.  Expected 9, got %d", __LINE__, i);
442    }
443
444
445    bi->first();
446    i = bi->previous();
447    if (i != BreakIterator::DONE) {
448        errln("Incorrect value from bi->previous() at line %d.  Expected DONE, got %d", __LINE__, i);
449    }
450
451    //
452    // next()
453    //
454    bi->first();
455    i = bi->next();
456    if (i != 1) {
457        errln("Incorrect value from bi->next() at line %d.  Expected 1, got %d", __LINE__, i);
458    }
459
460    bi->last();
461    i = bi->next();
462    if (i != BreakIterator::DONE) {
463        errln("Incorrect value from bi->next() at line %d.  Expected DONE, got %d", __LINE__, i);
464    }
465
466
467    //
468    //  current()
469    //
470    bi->first();
471    i = bi->current();
472    if (i != 0) {
473        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
474    }
475
476    bi->next();
477    i = bi->current();
478    if (i != 1) {
479        errln("Incorrect value from bi->previous() at line %d.  Expected 1, got %d", __LINE__, i);
480    }
481
482    bi->last();
483    bi->next();
484    i = bi->current();
485    if (i != 10) {
486        errln("Incorrect value from bi->previous() at line %d.  Expected 10, got %d", __LINE__, i);
487    }
488
489    bi->first();
490    bi->previous();
491    i = bi->current();
492    if (i != 0) {
493        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
494    }
495
496
497    //
498    // Following()
499    //
500    i = bi->following(4);
501    if (i != 5) {
502        errln("Incorrect value from bi->following() at line %d.  Expected 5, got %d", __LINE__, i);
503    }
504
505    i = bi->following(9);
506    if (i != 10) {
507        errln("Incorrect value from bi->following() at line %d.  Expected 10, got %d", __LINE__, i);
508    }
509
510    i = bi->following(10);
511    if (i != BreakIterator::DONE) {
512        errln("Incorrect value from bi->following() at line %d.  Expected DONE, got %d", __LINE__, i);
513    }
514
515
516    //
517    // Preceding
518    //
519    i = bi->preceding(4);
520    if (i != 3) {
521        errln("Incorrect value from bi->preceding() at line %d.  Expected 3, got %d", __LINE__, i);
522    }
523
524    i = bi->preceding(10);
525    if (i != 9) {
526        errln("Incorrect value from bi->preceding() at line %d.  Expected 9, got %d", __LINE__, i);
527    }
528
529    i = bi->preceding(1);
530    if (i != 0) {
531        errln("Incorrect value from bi->preceding() at line %d.  Expected 0, got %d", __LINE__, i);
532    }
533
534    i = bi->preceding(0);
535    if (i != BreakIterator::DONE) {
536        errln("Incorrect value from bi->preceding() at line %d.  Expected DONE, got %d", __LINE__, i);
537    }
538
539
540    //
541    // isBoundary()
542    //
543    bi->first();
544    if (bi->isBoundary(3) != TRUE) {
545        errln("Incorrect value from bi->isBoudary() at line %d.  Expected TRUE, got FALSE", __LINE__, i);
546    }
547    i = bi->current();
548    if (i != 3) {
549        errln("Incorrect value from bi->current() at line %d.  Expected 3, got %d", __LINE__, i);
550    }
551
552
553    if (bi->isBoundary(11) != FALSE) {
554        errln("Incorrect value from bi->isBoudary() at line %d.  Expected FALSE, got TRUE", __LINE__, i);
555    }
556    i = bi->current();
557    if (i != 10) {
558        errln("Incorrect value from bi->current() at line %d.  Expected 10, got %d", __LINE__, i);
559    }
560
561    //
562    // next(n)
563    //
564    bi->first();
565    i = bi->next(4);
566    if (i != 4) {
567        errln("Incorrect value from bi->next() at line %d.  Expected 4, got %d", __LINE__, i);
568    }
569
570    i = bi->next(6);
571    if (i != 10) {
572        errln("Incorrect value from bi->next() at line %d.  Expected 10, got %d", __LINE__, i);
573    }
574
575    bi->first();
576    i = bi->next(11);
577    if (i != BreakIterator::DONE) {
578        errln("Incorrect value from bi->next() at line %d.  Expected BreakIterator::DONE, got %d", __LINE__, i);
579    }
580
581    delete bi;
582
583}
584
585
586
587
588
589
590void RBBIAPITest::TestBuilder() {
591     UnicodeString rulesString1 = "$Letters = [:L:];\n"
592                                  "$Numbers = [:N:];\n"
593                                  "$Letters+;\n"
594                                  "$Numbers+;\n"
595                                  "[^$Letters $Numbers];\n"
596                                  "!.*;\n";
597     UnicodeString testString1  = "abc123..abc";
598                                // 01234567890
599     int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
600     UErrorCode status=U_ZERO_ERROR;
601     UParseError    parseError;
602
603     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
604     if(U_FAILURE(status)) {
605         dataerrln("Fail : in construction - %s", u_errorName(status));
606     } else {
607         bi->setText(testString1);
608         doBoundaryTest(*bi, testString1, bounds1);
609     }
610     delete bi;
611}
612
613
614//
615//  TestQuoteGrouping
616//       Single quotes within rules imply a grouping, so that a modifier
617//       following the quoted text (* or +) applies to all of the quoted chars.
618//
619void RBBIAPITest::TestQuoteGrouping() {
620     UnicodeString rulesString1 = "#Here comes the rule...\n"
621                                  "'$@!'*;\n"   //  (\$\@\!)*
622                                  ".;\n";
623
624     UnicodeString testString1  = "$@!$@!X$@!!X";
625                                // 0123456789012
626     int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
627     UErrorCode status=U_ZERO_ERROR;
628     UParseError    parseError;
629
630     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
631     if(U_FAILURE(status)) {
632         dataerrln("Fail : in construction - %s", u_errorName(status));
633     } else {
634         bi->setText(testString1);
635         doBoundaryTest(*bi, testString1, bounds1);
636     }
637     delete bi;
638}
639
640//
641//  TestRuleStatus
642//      Test word break rule status constants.
643//
644void RBBIAPITest::TestRuleStatus() {
645     UChar str[30];
646     u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
647              // 012345678901234567  8      9    0  1      2    3  4      5    6
648              //                    Ideographic    Katakana       Hiragana
649                str, 30);
650     UnicodeString testString1(str);
651     int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26};
652     int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
653                          UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
654                          UBRK_WORD_IDEO,     UBRK_WORD_IDEO,   UBRK_WORD_NONE,
655                          UBRK_WORD_KANA,     UBRK_WORD_NONE,   UBRK_WORD_KANA,    UBRK_WORD_KANA};
656
657     int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
658                          UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
659                          UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT,   UBRK_WORD_NONE_LIMIT,
660                          UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT,   UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT};
661
662     UErrorCode status=U_ZERO_ERROR;
663
664     RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
665     if(U_FAILURE(status)) {
666         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
667     } else {
668         bi->setText(testString1);
669         // First test that the breaks are in the right spots.
670         doBoundaryTest(*bi, testString1, bounds1);
671
672         // Then go back and check tag values
673         int32_t i = 0;
674         int32_t pos, tag;
675         for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
676             if (pos != bounds1[i]) {
677                 errln("FAIL: unexpected word break at postion %d", pos);
678                 break;
679             }
680             tag = bi->getRuleStatus();
681             if (tag < tag_lo[i] || tag >= tag_hi[i]) {
682                 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
683                 break;
684             }
685
686             // Check that we get the same tag values from getRuleStatusVec()
687             int32_t vec[10];
688             int t = bi->getRuleStatusVec(vec, 10, status);
689             TEST_ASSERT_SUCCESS(status);
690             TEST_ASSERT(t==1);
691             TEST_ASSERT(vec[0] == tag);
692         }
693     }
694     delete bi;
695
696     // Now test line break status.  This test mostly is to confirm that the status constants
697     //                              are correctly declared in the header.
698     testString1 =   "test line. \n";
699     // break type    s    s     h
700
701     bi = (RuleBasedBreakIterator *)
702         BreakIterator::createLineInstance(Locale::getEnglish(), status);
703     if(U_FAILURE(status)) {
704         errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
705     } else {
706         int32_t i = 0;
707         int32_t pos, tag;
708         UBool   success;
709
710         bi->setText(testString1);
711         pos = bi->current();
712         tag = bi->getRuleStatus();
713         for (i=0; i<3; i++) {
714             switch (i) {
715             case 0:
716                 success = pos==0  && tag==UBRK_LINE_SOFT; break;
717             case 1:
718                 success = pos==5  && tag==UBRK_LINE_SOFT; break;
719             case 2:
720                 success = pos==12 && tag==UBRK_LINE_HARD; break;
721             default:
722                 success = FALSE; break;
723             }
724             if (success == FALSE) {
725                 errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
726                     i, pos, tag);
727                 break;
728             }
729             pos = bi->next();
730             tag = bi->getRuleStatus();
731         }
732         if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
733             UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
734             UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) {
735             errln("UBRK_LINE_* constants from header are inconsistent.");
736         }
737     }
738     delete bi;
739
740}
741
742
743//
744//  TestRuleStatusVec
745//      Test the vector form of  break rule status.
746//
747void RBBIAPITest::TestRuleStatusVec() {
748    UnicodeString rulesString(   "[A-N]{100}; \n"
749                                 "[a-w]{200}; \n"
750                                 "[\\p{L}]{300}; \n"
751                                 "[\\p{N}]{400}; \n"
752                                 "[0-5]{500}; \n"
753                                  "!.*;\n", -1, US_INV);
754     UnicodeString testString1  = "Aapz5?";
755     int32_t  statusVals[10];
756     int32_t  numStatuses;
757     int32_t  pos;
758
759     UErrorCode status=U_ZERO_ERROR;
760     UParseError    parseError;
761
762     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
763     if (U_FAILURE(status)) {
764         dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
765     } else {
766         bi->setText(testString1);
767
768         // A
769         pos = bi->next();
770         TEST_ASSERT(pos==1);
771         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
772         TEST_ASSERT_SUCCESS(status);
773         TEST_ASSERT(numStatuses == 2);
774         TEST_ASSERT(statusVals[0] == 100);
775         TEST_ASSERT(statusVals[1] == 300);
776
777         // a
778         pos = bi->next();
779         TEST_ASSERT(pos==2);
780         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
781         TEST_ASSERT_SUCCESS(status);
782         TEST_ASSERT(numStatuses == 2);
783         TEST_ASSERT(statusVals[0] == 200);
784         TEST_ASSERT(statusVals[1] == 300);
785
786         // p
787         pos = bi->next();
788         TEST_ASSERT(pos==3);
789         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
790         TEST_ASSERT_SUCCESS(status);
791         TEST_ASSERT(numStatuses == 2);
792         TEST_ASSERT(statusVals[0] == 200);
793         TEST_ASSERT(statusVals[1] == 300);
794
795         // z
796         pos = bi->next();
797         TEST_ASSERT(pos==4);
798         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
799         TEST_ASSERT_SUCCESS(status);
800         TEST_ASSERT(numStatuses == 1);
801         TEST_ASSERT(statusVals[0] == 300);
802
803         // 5
804         pos = bi->next();
805         TEST_ASSERT(pos==5);
806         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
807         TEST_ASSERT_SUCCESS(status);
808         TEST_ASSERT(numStatuses == 2);
809         TEST_ASSERT(statusVals[0] == 400);
810         TEST_ASSERT(statusVals[1] == 500);
811
812         // ?
813         pos = bi->next();
814         TEST_ASSERT(pos==6);
815         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
816         TEST_ASSERT_SUCCESS(status);
817         TEST_ASSERT(numStatuses == 1);
818         TEST_ASSERT(statusVals[0] == 0);
819
820         //
821         //  Check buffer overflow error handling.   Char == A
822         //
823         bi->first();
824         pos = bi->next();
825         TEST_ASSERT(pos==1);
826         memset(statusVals, -1, sizeof(statusVals));
827         numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
828         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
829         TEST_ASSERT(numStatuses == 2);
830         TEST_ASSERT(statusVals[0] == -1);
831
832         status = U_ZERO_ERROR;
833         memset(statusVals, -1, sizeof(statusVals));
834         numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
835         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
836         TEST_ASSERT(numStatuses == 2);
837         TEST_ASSERT(statusVals[0] == 100);
838         TEST_ASSERT(statusVals[1] == -1);
839
840         status = U_ZERO_ERROR;
841         memset(statusVals, -1, sizeof(statusVals));
842         numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
843         TEST_ASSERT_SUCCESS(status);
844         TEST_ASSERT(numStatuses == 2);
845         TEST_ASSERT(statusVals[0] == 100);
846         TEST_ASSERT(statusVals[1] == 300);
847         TEST_ASSERT(statusVals[2] == -1);
848     }
849     delete bi;
850
851}
852
853//
854//   Bug 2190 Regression test.   Builder crash on rule consisting of only a
855//                               $variable reference
856void RBBIAPITest::TestBug2190() {
857     UnicodeString rulesString1 = "$aaa = abcd;\n"
858                                  "$bbb = $aaa;\n"
859                                  "$bbb;\n";
860     UnicodeString testString1  = "abcdabcd";
861                                // 01234567890
862     int32_t bounds1[] = {0, 4, 8};
863     UErrorCode status=U_ZERO_ERROR;
864     UParseError    parseError;
865
866     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
867     if(U_FAILURE(status)) {
868         dataerrln("Fail : in construction - %s", u_errorName(status));
869     } else {
870         bi->setText(testString1);
871         doBoundaryTest(*bi, testString1, bounds1);
872     }
873     delete bi;
874}
875
876
877void RBBIAPITest::TestRegistration() {
878#if !UCONFIG_NO_SERVICE
879    UErrorCode status = U_ZERO_ERROR;
880    BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
881
882    // ok to not delete these if we exit because of error?
883    BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
884    BreakIterator* root_word = BreakIterator::createWordInstance("", status);
885    BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
886
887    if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
888        dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
889        delete ja_word;
890        delete ja_char;
891        delete root_word;
892        delete root_char;
893
894        return;
895    }
896
897    URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
898    {
899        if (ja_word && *ja_word == *root_word) {
900            errln("japan not different from root");
901        }
902    }
903
904    {
905        BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
906        UBool fail = TRUE;
907        if(result){
908            fail = *result != *ja_word;
909        }
910        delete result;
911        if (fail) {
912            errln("bad result for xx_XX/word");
913        }
914    }
915
916    {
917        BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
918        UBool fail = TRUE;
919        if(result){
920            fail = *result != *ja_char;
921        }
922        delete result;
923        if (fail) {
924            errln("bad result for ja_JP/char");
925        }
926    }
927
928    {
929        BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
930        UBool fail = TRUE;
931        if(result){
932            fail = *result != *root_char;
933        }
934        delete result;
935        if (fail) {
936            errln("bad result for xx_XX/char");
937        }
938    }
939
940    {
941        StringEnumeration* avail = BreakIterator::getAvailableLocales();
942        UBool found = FALSE;
943        const UnicodeString* p;
944        while ((p = avail->snext(status))) {
945            if (p->compare("xx") == 0) {
946                found = TRUE;
947                break;
948            }
949        }
950        delete avail;
951        if (!found) {
952            errln("did not find test locale");
953        }
954    }
955
956    {
957        UBool unreg = BreakIterator::unregister(key, status);
958        if (!unreg) {
959            errln("unable to unregister");
960        }
961    }
962
963    {
964        BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
965        BreakIterator* root = BreakIterator::createWordInstance("", status);
966        UBool fail = TRUE;
967        if(root){
968          fail = *root != *result;
969        }
970        delete root;
971        delete result;
972        if (fail) {
973            errln("did not get root break");
974        }
975    }
976
977    {
978        StringEnumeration* avail = BreakIterator::getAvailableLocales();
979        UBool found = FALSE;
980        const UnicodeString* p;
981        while ((p = avail->snext(status))) {
982            if (p->compare("xx") == 0) {
983                found = TRUE;
984                break;
985            }
986        }
987        delete avail;
988        if (found) {
989            errln("found test locale");
990        }
991    }
992
993    {
994        int32_t count;
995        UBool   foundLocale = FALSE;
996        const Locale *avail = BreakIterator::getAvailableLocales(count);
997        for (int i=0; i<count; i++) {
998            if (avail[i] == Locale::getEnglish()) {
999                foundLocale = TRUE;
1000                break;
1001            }
1002        }
1003        if (foundLocale == FALSE) {
1004            errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1005        }
1006    }
1007
1008
1009    // ja_word was adopted by factory
1010    delete ja_char;
1011    delete root_word;
1012    delete root_char;
1013#endif
1014}
1015
1016void RBBIAPITest::RoundtripRule(const char *dataFile) {
1017    UErrorCode status = U_ZERO_ERROR;
1018    UParseError parseError;
1019    parseError.line = 0;
1020    parseError.offset = 0;
1021    UDataMemory *data = udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status);
1022    uint32_t length;
1023    const UChar *builtSource;
1024    const uint8_t *rbbiRules;
1025    const uint8_t *builtRules;
1026
1027    if (U_FAILURE(status)) {
1028        errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status));
1029        return;
1030    }
1031
1032    builtRules = (const uint8_t *)udata_getMemory(data);
1033    builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1034    RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1035    if (U_FAILURE(status)) {
1036        errln("createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1037                u_errorName(status), parseError.line, parseError.offset);
1038        return;
1039    };
1040    rbbiRules = brkItr->getBinaryRules(length);
1041    logln("Comparing \"%s\" len=%d", dataFile, length);
1042    if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1043        errln("Built rules and rebuilt rules are different %s", dataFile);
1044        return;
1045    }
1046    delete brkItr;
1047    udata_close(data);
1048}
1049
1050void RBBIAPITest::TestRoundtripRules() {
1051    RoundtripRule("word");
1052    RoundtripRule("title");
1053    RoundtripRule("sent");
1054    RoundtripRule("line");
1055    RoundtripRule("char");
1056    if (!quick) {
1057        RoundtripRule("word_ja");
1058        RoundtripRule("word_POSIX");
1059    }
1060}
1061
1062// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1063// (these are protected so we access them via a local class RBBIWithProtectedFunctions).
1064// This is just a sanity check, not a thorough test (e.g. we don't check that the
1065// first delete actually frees rulesCopy).
1066void RBBIAPITest::TestCreateFromRBBIData() {
1067    // Get some handy RBBIData
1068    const char *brkName = "word"; // or "sent", "line", "char", etc.
1069    UErrorCode status = U_ZERO_ERROR;
1070    UDataMemory * data = udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status);
1071    if ( U_SUCCESS(status) ) {
1072        const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data);
1073        uint32_t length = builtRules->fLength;
1074        RBBIWithProtectedFunctions * brkItr;
1075
1076        // Try the memory-adopting constructor, need to copy the data first
1077        RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
1078        if ( rulesCopy ) {
1079            uprv_memcpy( rulesCopy, builtRules, length );
1080
1081            brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
1082            if ( U_SUCCESS(status) ) {
1083                delete brkItr; // this should free rulesCopy
1084            } else {
1085                errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
1086                status = U_ZERO_ERROR;// reset for the next test
1087                uprv_free( rulesCopy );
1088            }
1089        }
1090
1091        // Now try the non-adopting constructor
1092        brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
1093        if ( U_SUCCESS(status) ) {
1094            delete brkItr; // this should NOT attempt to free builtRules
1095            if (builtRules->fLength != length) { // sanity check
1096                errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
1097            }
1098        } else {
1099            errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
1100        }
1101
1102        udata_close(data);
1103    }
1104}
1105
1106//---------------------------------------------
1107// runIndexedTest
1108//---------------------------------------------
1109
1110void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1111{
1112    if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1113    switch (index) {
1114     //   case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1115        case  0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1116        case  1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1117        case  2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1118        case  3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1119        case  4: name = "TestIteration"; if (exec) TestIteration(); break;
1120        case  5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1121        case  6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
1122        case  7: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1123        case  8: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1124        case  9: name = "TestBug2190"; if (exec) TestBug2190(); break;
1125        case 10: name = "TestRegistration"; if (exec) TestRegistration(); break;
1126        case 11: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1127        case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1128        case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
1129
1130        default: name = ""; break; // needed to end loop
1131    }
1132}
1133
1134//---------------------------------------------
1135//Internal subroutines
1136//---------------------------------------------
1137
1138void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1139     logln((UnicodeString)"testIsBoundary():");
1140        int32_t p = 0;
1141        UBool isB;
1142        for (int32_t i = 0; i < text.length(); i++) {
1143            isB = bi.isBoundary(i);
1144            logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1145
1146            if (i == boundaries[p]) {
1147                if (!isB)
1148                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1149                p++;
1150            }
1151            else {
1152                if (isB)
1153                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1154            }
1155        }
1156}
1157void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1158    UnicodeString selected;
1159    UnicodeString expected=CharsToUnicodeString(expectedString);
1160
1161    if(gotoffset != expectedOffset)
1162         errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1163    if(start <= gotoffset){
1164        testString.extractBetween(start, gotoffset, selected);
1165    }
1166    else{
1167        testString.extractBetween(gotoffset, start, selected);
1168    }
1169    if(selected.compare(expected) != 0)
1170         errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1171    else
1172        logln(prettify("****selected \"" + selected + "\""));
1173}
1174
1175//---------------------------------------------
1176//RBBIWithProtectedFunctions class functions
1177//---------------------------------------------
1178
1179RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
1180    : RuleBasedBreakIterator(data, status)
1181{
1182}
1183
1184RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
1185    : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
1186{
1187}
1188
1189#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1190