rbbiapts.cpp revision b13da9df870a61b11249bf741347908dbea0edd8
1/********************************************************************
2 * Copyright (c) 1999-2007, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 *   Date        Name        Description
6 *   12/14/99    Madhu        Creation.
7 *   01/12/2000  Madhu        updated for changed API
8 ********************************************************************/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/uchar.h"
15#include "intltest.h"
16#include "unicode/rbbi.h"
17#include "unicode/schriter.h"
18#include "rbbiapts.h"
19#include "rbbidata.h"
20#include "cstring.h"
21#include "ubrkimpl.h"
22#include "unicode/ustring.h"
23#include "unicode/utext.h"
24
25/**
26 * API Test the RuleBasedBreakIterator class
27 */
28
29
30#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
31errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
32
33#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
34errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}}
35
36void RBBIAPITest::TestCloneEquals()
37{
38
39    UErrorCode status=U_ZERO_ERROR;
40    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
41    RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
42    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
43    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
44    if(U_FAILURE(status)){
45        errln((UnicodeString)"FAIL : in construction");
46        return;
47    }
48
49
50    UnicodeString testString="Testing word break iterators's clone() and equals()";
51    bi1->setText(testString);
52    bi2->setText(testString);
53    biequal->setText(testString);
54
55    bi3->setText("hello");
56
57    logln((UnicodeString)"Testing equals()");
58
59    logln((UnicodeString)"Testing == and !=");
60    UBool b = (*bi1 != *biequal);
61    b |= *bi1 == *bi2;
62    b |= *bi1 == *bi3;
63    if (b) {
64        errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
65    }
66
67    if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
68        errln((UnicodeString)"ERROR:2 RBBI's == and != operator  failed.");
69
70
71    // Quick test of RulesBasedBreakIterator assignment -
72    // Check that
73    //    two different iterators are !=
74    //    they are == after assignment
75    //    source and dest iterator produce the same next() after assignment.
76    //    deleting one doesn't disable the other.
77    logln("Testing assignment");
78    RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
79    if(U_FAILURE(status)){
80        errln((UnicodeString)"FAIL : in construction");
81        return;
82    }
83
84    RuleBasedBreakIterator biDefault, biDefault2;
85    if(U_FAILURE(status)){
86        errln((UnicodeString)"FAIL : in construction of default iterator");
87        return;
88    }
89    if (biDefault == *bix) {
90        errln((UnicodeString)"ERROR: iterators should not compare ==");
91        return;
92    }
93    if (biDefault != biDefault2) {
94        errln((UnicodeString)"ERROR: iterators should compare ==");
95        return;
96    }
97
98
99    UnicodeString   HelloString("Hello Kitty");
100    bix->setText(HelloString);
101    if (*bix == *bi2) {
102        errln(UnicodeString("ERROR: strings should not be equal before assignment."));
103    }
104    *bix = *bi2;
105    if (*bix != *bi2) {
106        errln(UnicodeString("ERROR: strings should be equal before assignment."));
107    }
108
109    int bixnext = bix->next();
110    int bi2next = bi2->next();
111    if (! (bixnext == bi2next && bixnext == 7)) {
112        errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
113    }
114    delete bix;
115    if (bi2->next() != 8) {
116        errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
117    }
118
119
120
121    logln((UnicodeString)"Testing clone()");
122    RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
123    RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
124
125    if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
126      *bi1clone == *bi3 || *bi1clone == *bi2)
127        errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
128
129    if(*bi2clone == *bi1 || *bi2clone == *biequal ||
130       *bi2clone == *bi3 || *bi2clone != *bi2)
131        errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
132
133    if(bi1->getText() != bi1clone->getText()   ||
134       bi2clone->getText() != bi2->getText()   ||
135       *bi2clone == *bi1clone )
136        errln((UnicodeString)"ERROR: RBBI's clone() method failed");
137
138    delete bi1clone;
139    delete bi2clone;
140    delete bi1;
141    delete bi3;
142    delete bi2;
143    delete biequal;
144}
145
146void RBBIAPITest::TestBoilerPlate()
147{
148    UErrorCode status = U_ZERO_ERROR;
149    BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
150    BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
151    if (U_FAILURE(status)) {
152        errln("Creation of break iterator failed %s", u_errorName(status));
153        return;
154    }
155    if(*a!=*b){
156        errln("Failed: boilerplate method operator!= does not return correct results");
157    }
158    BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status);
159    if(a && c){
160        if(*c==*a){
161            errln("Failed: boilerplate method opertator== does not return correct results");
162        }
163    }else{
164        errln("creation of break iterator failed");
165    }
166    delete a;
167    delete b;
168    delete c;
169}
170
171void RBBIAPITest::TestgetRules()
172{
173    UErrorCode status=U_ZERO_ERROR;
174
175    RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
176    RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
177    if(U_FAILURE(status)){
178        errln((UnicodeString)"FAIL: in construction");
179        delete bi1;
180        delete bi2;
181        return;
182    }
183
184
185
186    logln((UnicodeString)"Testing toString()");
187
188    bi1->setText((UnicodeString)"Hello there");
189
190    RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
191
192    UnicodeString temp=bi1->getRules();
193    UnicodeString temp2=bi2->getRules();
194    UnicodeString temp3=bi3->getRules();
195    if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
196        errln((UnicodeString)"ERROR: error in getRules() method");
197
198    delete bi1;
199    delete bi2;
200    delete bi3;
201}
202void RBBIAPITest::TestHashCode()
203{
204    UErrorCode status=U_ZERO_ERROR;
205    RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
206    RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
207    RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
208    if(U_FAILURE(status)){
209        errln((UnicodeString)"FAIL : in construction");
210        delete bi1;
211        delete bi2;
212        delete bi3;
213        return;
214    }
215
216
217    logln((UnicodeString)"Testing hashCode()");
218
219    bi1->setText((UnicodeString)"Hash code");
220    bi2->setText((UnicodeString)"Hash code");
221    bi3->setText((UnicodeString)"Hash code");
222
223    RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
224    RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
225
226    if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
227        bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
228        errln((UnicodeString)"ERROR: identical objects have different hashcodes");
229
230    if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
231        bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
232        errln((UnicodeString)"ERROR: different objects have same hashcodes");
233
234    delete bi1clone;
235    delete bi2clone;
236    delete bi1;
237    delete bi2;
238    delete bi3;
239
240}
241void RBBIAPITest::TestGetSetAdoptText()
242{
243    logln((UnicodeString)"Testing getText setText ");
244    UErrorCode status=U_ZERO_ERROR;
245    UnicodeString str1="first string.";
246    UnicodeString str2="Second string.";
247    RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
248    RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
249    if(U_FAILURE(status)){
250        errln((UnicodeString)"FAIL : in construction");
251            return;
252    }
253
254
255    CharacterIterator* text1= new StringCharacterIterator(str1);
256    CharacterIterator* text1Clone = text1->clone();
257    CharacterIterator* text2= new StringCharacterIterator(str2);
258    CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
259
260    wordIter1->setText(str1);
261    CharacterIterator *tci = &wordIter1->getText();
262    UnicodeString      tstr;
263    tci->getText(tstr);
264    TEST_ASSERT(tstr == str1);
265    if(wordIter1->current() != 0)
266        errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
267
268    wordIter1->next(2);
269
270    wordIter1->setText(str2);
271    if(wordIter1->current() != 0)
272        errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
273
274
275    charIter1->adoptText(text1Clone);
276    TEST_ASSERT(wordIter1->getText() != charIter1->getText());
277    tci = &wordIter1->getText();
278    tci->getText(tstr);
279    TEST_ASSERT(tstr == str2);
280    tci = &charIter1->getText();
281    tci->getText(tstr);
282    TEST_ASSERT(tstr == str1);
283
284
285    RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone();
286    rb->adoptText(text1);
287    if(rb->getText() != *text1)
288        errln((UnicodeString)"ERROR:1 error in adoptText ");
289    rb->adoptText(text2);
290    if(rb->getText() != *text2)
291        errln((UnicodeString)"ERROR:2 error in adoptText ");
292
293    // Adopt where iterator range is less than the entire orignal source string.
294    //   (With the change of the break engine to working with UText internally,
295    //    CharacterIterators starting at positions other than zero are not supported)
296    rb->adoptText(text3);
297    TEST_ASSERT(rb->preceding(2) == 0);
298    TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
299    //if(rb->preceding(2) != 3) {
300    //    errln((UnicodeString)"ERROR:3 error in adoptText ");
301    //}
302    //if(rb->following(11) != BreakIterator::DONE) {
303    //    errln((UnicodeString)"ERROR:4 error in adoptText ");
304    //}
305
306    // UText API
307    //
308    //   Quick test to see if UText is working at all.
309    //
310    const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
311    const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
312    //                012345678901
313
314    status = U_ZERO_ERROR;
315    UText *ut = utext_openUTF8(NULL, s1, -1, &status);
316    wordIter1->setText(ut, status);
317    TEST_ASSERT_SUCCESS(status);
318
319    int32_t pos;
320    pos = wordIter1->first();
321    TEST_ASSERT(pos==0);
322    pos = wordIter1->next();
323    TEST_ASSERT(pos==5);
324    pos = wordIter1->next();
325    TEST_ASSERT(pos==6);
326    pos = wordIter1->next();
327    TEST_ASSERT(pos==11);
328    pos = wordIter1->next();
329    TEST_ASSERT(pos==UBRK_DONE);
330
331    status = U_ZERO_ERROR;
332    UText *ut2 = utext_openUTF8(NULL, s2, -1, &status);
333    TEST_ASSERT_SUCCESS(status);
334    wordIter1->setText(ut2, status);
335    TEST_ASSERT_SUCCESS(status);
336
337    pos = wordIter1->first();
338    TEST_ASSERT(pos==0);
339    pos = wordIter1->next();
340    TEST_ASSERT(pos==3);
341    pos = wordIter1->next();
342    TEST_ASSERT(pos==4);
343
344    pos = wordIter1->last();
345    TEST_ASSERT(pos==6);
346    pos = wordIter1->previous();
347    TEST_ASSERT(pos==4);
348    pos = wordIter1->previous();
349    TEST_ASSERT(pos==3);
350    pos = wordIter1->previous();
351    TEST_ASSERT(pos==0);
352    pos = wordIter1->previous();
353    TEST_ASSERT(pos==UBRK_DONE);
354
355    status = U_ZERO_ERROR;
356    UnicodeString sEmpty;
357    UText *gut2 = utext_openUnicodeString(NULL, &sEmpty, &status);
358    wordIter1->getUText(gut2, status);
359    TEST_ASSERT_SUCCESS(status);
360    utext_close(gut2);
361
362    utext_close(ut);
363    utext_close(ut2);
364
365    delete wordIter1;
366    delete charIter1;
367    delete rb;
368
369 }
370
371
372void RBBIAPITest::TestIteration()
373{
374    // This test just verifies that the API is present.
375    // Testing for correct operation of the break rules happens elsewhere.
376
377    UErrorCode status=U_ZERO_ERROR;
378    RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
379    if (U_FAILURE(status) || bi == NULL)  {
380        errln("Failure creating character break iterator.  Status = %s", u_errorName(status));
381    }
382    delete bi;
383
384    status=U_ZERO_ERROR;
385    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
386    if (U_FAILURE(status) || bi == NULL)  {
387        errln("Failure creating Word break iterator.  Status = %s", u_errorName(status));
388    }
389    delete bi;
390
391    status=U_ZERO_ERROR;
392    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
393    if (U_FAILURE(status) || bi == NULL)  {
394        errln("Failure creating Line break iterator.  Status = %s", u_errorName(status));
395    }
396    delete bi;
397
398    status=U_ZERO_ERROR;
399    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
400    if (U_FAILURE(status) || bi == NULL)  {
401        errln("Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
402    }
403    delete bi;
404
405    status=U_ZERO_ERROR;
406    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
407    if (U_FAILURE(status) || bi == NULL)  {
408        errln("Failure creating Title break iterator.  Status = %s", u_errorName(status));
409    }
410    delete bi;
411
412    status=U_ZERO_ERROR;
413    bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
414    if (U_FAILURE(status) || bi == NULL)  {
415        errln("Failure creating character break iterator.  Status = %s", u_errorName(status));
416        return;   // Skip the rest of these tests.
417    }
418
419
420    UnicodeString testString="0123456789";
421    bi->setText(testString);
422
423    int32_t i;
424    i = bi->first();
425    if (i != 0) {
426        errln("Incorrect value from bi->first().  Expected 0, got %d.", i);
427    }
428
429    i = bi->last();
430    if (i != 10) {
431        errln("Incorrect value from bi->last().  Expected 10, got %d", i);
432    }
433
434    //
435    // Previous
436    //
437    bi->last();
438    i = bi->previous();
439    if (i != 9) {
440        errln("Incorrect value from bi->last() at line %d.  Expected 9, got %d", __LINE__, i);
441    }
442
443
444    bi->first();
445    i = bi->previous();
446    if (i != BreakIterator::DONE) {
447        errln("Incorrect value from bi->previous() at line %d.  Expected DONE, got %d", __LINE__, i);
448    }
449
450    //
451    // next()
452    //
453    bi->first();
454    i = bi->next();
455    if (i != 1) {
456        errln("Incorrect value from bi->next() at line %d.  Expected 1, got %d", __LINE__, i);
457    }
458
459    bi->last();
460    i = bi->next();
461    if (i != BreakIterator::DONE) {
462        errln("Incorrect value from bi->next() at line %d.  Expected DONE, got %d", __LINE__, i);
463    }
464
465
466    //
467    //  current()
468    //
469    bi->first();
470    i = bi->current();
471    if (i != 0) {
472        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
473    }
474
475    bi->next();
476    i = bi->current();
477    if (i != 1) {
478        errln("Incorrect value from bi->previous() at line %d.  Expected 1, got %d", __LINE__, i);
479    }
480
481    bi->last();
482    bi->next();
483    i = bi->current();
484    if (i != 10) {
485        errln("Incorrect value from bi->previous() at line %d.  Expected 10, got %d", __LINE__, i);
486    }
487
488    bi->first();
489    bi->previous();
490    i = bi->current();
491    if (i != 0) {
492        errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
493    }
494
495
496    //
497    // Following()
498    //
499    i = bi->following(4);
500    if (i != 5) {
501        errln("Incorrect value from bi->following() at line %d.  Expected 5, got %d", __LINE__, i);
502    }
503
504    i = bi->following(9);
505    if (i != 10) {
506        errln("Incorrect value from bi->following() at line %d.  Expected 10, got %d", __LINE__, i);
507    }
508
509    i = bi->following(10);
510    if (i != BreakIterator::DONE) {
511        errln("Incorrect value from bi->following() at line %d.  Expected DONE, got %d", __LINE__, i);
512    }
513
514
515    //
516    // Preceding
517    //
518    i = bi->preceding(4);
519    if (i != 3) {
520        errln("Incorrect value from bi->preceding() at line %d.  Expected 3, got %d", __LINE__, i);
521    }
522
523    i = bi->preceding(10);
524    if (i != 9) {
525        errln("Incorrect value from bi->preceding() at line %d.  Expected 9, got %d", __LINE__, i);
526    }
527
528    i = bi->preceding(1);
529    if (i != 0) {
530        errln("Incorrect value from bi->preceding() at line %d.  Expected 0, got %d", __LINE__, i);
531    }
532
533    i = bi->preceding(0);
534    if (i != BreakIterator::DONE) {
535        errln("Incorrect value from bi->preceding() at line %d.  Expected DONE, got %d", __LINE__, i);
536    }
537
538
539    //
540    // isBoundary()
541    //
542    bi->first();
543    if (bi->isBoundary(3) != TRUE) {
544        errln("Incorrect value from bi->isBoudary() at line %d.  Expected TRUE, got FALSE", __LINE__, i);
545    }
546    i = bi->current();
547    if (i != 3) {
548        errln("Incorrect value from bi->current() at line %d.  Expected 3, got %d", __LINE__, i);
549    }
550
551
552    if (bi->isBoundary(11) != FALSE) {
553        errln("Incorrect value from bi->isBoudary() at line %d.  Expected FALSE, got TRUE", __LINE__, i);
554    }
555    i = bi->current();
556    if (i != 10) {
557        errln("Incorrect value from bi->current() at line %d.  Expected 10, got %d", __LINE__, i);
558    }
559
560    //
561    // next(n)
562    //
563    bi->first();
564    i = bi->next(4);
565    if (i != 4) {
566        errln("Incorrect value from bi->next() at line %d.  Expected 4, got %d", __LINE__, i);
567    }
568
569    i = bi->next(6);
570    if (i != 10) {
571        errln("Incorrect value from bi->next() at line %d.  Expected 10, got %d", __LINE__, i);
572    }
573
574    bi->first();
575    i = bi->next(11);
576    if (i != BreakIterator::DONE) {
577        errln("Incorrect value from bi->next() at line %d.  Expected BreakIterator::DONE, got %d", __LINE__, i);
578    }
579
580    delete bi;
581
582}
583
584
585
586
587
588
589void RBBIAPITest::TestBuilder() {
590     UnicodeString rulesString1 = "$Letters = [:L:];\n"
591                                  "$Numbers = [:N:];\n"
592                                  "$Letters+;\n"
593                                  "$Numbers+;\n"
594                                  "[^$Letters $Numbers];\n"
595                                  "!.*;\n";
596     UnicodeString testString1  = "abc123..abc";
597                                // 01234567890
598     int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
599     UErrorCode status=U_ZERO_ERROR;
600     UParseError    parseError;
601
602     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
603     if(U_FAILURE(status)) {
604         errln("FAIL : in construction");
605     } else {
606         bi->setText(testString1);
607         doBoundaryTest(*bi, testString1, bounds1);
608     }
609     delete bi;
610}
611
612
613//
614//  TestQuoteGrouping
615//       Single quotes within rules imply a grouping, so that a modifier
616//       following the quoted text (* or +) applies to all of the quoted chars.
617//
618void RBBIAPITest::TestQuoteGrouping() {
619     UnicodeString rulesString1 = "#Here comes the rule...\n"
620                                  "'$@!'*;\n"   //  (\$\@\!)*
621                                  ".;\n";
622
623     UnicodeString testString1  = "$@!$@!X$@!!X";
624                                // 0123456789012
625     int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
626     UErrorCode status=U_ZERO_ERROR;
627     UParseError    parseError;
628
629     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
630     if(U_FAILURE(status)) {
631         errln("FAIL : in construction");
632     } else {
633         bi->setText(testString1);
634         doBoundaryTest(*bi, testString1, bounds1);
635     }
636     delete bi;
637}
638
639//
640//  TestRuleStatus
641//      Test word break rule status constants.
642//
643void RBBIAPITest::TestRuleStatus() {
644     UChar str[30];
645     u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
646              // 012345678901234567  8      9    0  1      2    3  4      5    6
647              //                    Ideographic    Katakana       Hiragana
648                str, 30);
649     UnicodeString testString1(str);
650     int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26};
651     int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
652                          UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
653                          UBRK_WORD_IDEO,     UBRK_WORD_IDEO,   UBRK_WORD_NONE,
654                          UBRK_WORD_KANA,     UBRK_WORD_NONE,   UBRK_WORD_KANA,    UBRK_WORD_KANA};
655
656     int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
657                          UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
658                          UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT,   UBRK_WORD_NONE_LIMIT,
659                          UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT,   UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT};
660
661     UErrorCode status=U_ZERO_ERROR;
662
663     RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
664     if(U_FAILURE(status)) {
665         errln("FAIL : in construction");
666     } else {
667         bi->setText(testString1);
668         // First test that the breaks are in the right spots.
669         doBoundaryTest(*bi, testString1, bounds1);
670
671         // Then go back and check tag values
672         int32_t i = 0;
673         int32_t pos, tag;
674         for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
675             if (pos != bounds1[i]) {
676                 errln("FAIL: unexpected word break at postion %d", pos);
677                 break;
678             }
679             tag = bi->getRuleStatus();
680             if (tag < tag_lo[i] || tag >= tag_hi[i]) {
681                 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
682                 break;
683             }
684
685             // Check that we get the same tag values from getRuleStatusVec()
686             int32_t vec[10];
687             int t = bi->getRuleStatusVec(vec, 10, status);
688             TEST_ASSERT_SUCCESS(status);
689             TEST_ASSERT(t==1);
690             TEST_ASSERT(vec[0] == tag);
691         }
692     }
693     delete bi;
694
695     // Now test line break status.  This test mostly is to confirm that the status constants
696     //                              are correctly declared in the header.
697     testString1 =   "test line. \n";
698     // break type    s    s     h
699
700     bi = (RuleBasedBreakIterator *)
701         BreakIterator::createLineInstance(Locale::getEnglish(), status);
702     if(U_FAILURE(status)) {
703         errln("failed to create word break iterator.");
704     } else {
705         int32_t i = 0;
706         int32_t pos, tag;
707         UBool   success;
708
709         bi->setText(testString1);
710         pos = bi->current();
711         tag = bi->getRuleStatus();
712         for (i=0; i<3; i++) {
713             switch (i) {
714             case 0:
715                 success = pos==0  && tag==UBRK_LINE_SOFT; break;
716             case 1:
717                 success = pos==5  && tag==UBRK_LINE_SOFT; break;
718             case 2:
719                 success = pos==12 && tag==UBRK_LINE_HARD; break;
720             default:
721                 success = FALSE; break;
722             }
723             if (success == FALSE) {
724                 errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
725                     i, pos, tag);
726                 break;
727             }
728             pos = bi->next();
729             tag = bi->getRuleStatus();
730         }
731         if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
732             UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
733             UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) {
734             errln("UBRK_LINE_* constants from header are inconsistent.");
735         }
736     }
737     delete bi;
738
739}
740
741
742//
743//  TestRuleStatusVec
744//      Test the vector form of  break rule status.
745//
746void RBBIAPITest::TestRuleStatusVec() {
747    UnicodeString rulesString  = "[A-N]{100}; \n"
748                                 "[a-w]{200}; \n"
749                                 "[\\p{L}]{300}; \n"
750                                 "[\\p{N}]{400}; \n"
751                                 "[0-5]{500}; \n"
752                                  "!.*;\n";
753     UnicodeString testString1  = "Aapz5?";
754     int32_t  statusVals[10];
755     int32_t  numStatuses;
756     int32_t  pos;
757
758     UErrorCode status=U_ZERO_ERROR;
759     UParseError    parseError;
760
761     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
762     TEST_ASSERT_SUCCESS(status);
763     if (U_SUCCESS(status)) {
764         bi->setText(testString1);
765
766         // A
767         pos = bi->next();
768         TEST_ASSERT(pos==1);
769         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
770         TEST_ASSERT_SUCCESS(status);
771         TEST_ASSERT(numStatuses == 2);
772         TEST_ASSERT(statusVals[0] == 100);
773         TEST_ASSERT(statusVals[1] == 300);
774
775         // a
776         pos = bi->next();
777         TEST_ASSERT(pos==2);
778         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
779         TEST_ASSERT_SUCCESS(status);
780         TEST_ASSERT(numStatuses == 2);
781         TEST_ASSERT(statusVals[0] == 200);
782         TEST_ASSERT(statusVals[1] == 300);
783
784         // p
785         pos = bi->next();
786         TEST_ASSERT(pos==3);
787         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
788         TEST_ASSERT_SUCCESS(status);
789         TEST_ASSERT(numStatuses == 2);
790         TEST_ASSERT(statusVals[0] == 200);
791         TEST_ASSERT(statusVals[1] == 300);
792
793         // z
794         pos = bi->next();
795         TEST_ASSERT(pos==4);
796         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
797         TEST_ASSERT_SUCCESS(status);
798         TEST_ASSERT(numStatuses == 1);
799         TEST_ASSERT(statusVals[0] == 300);
800
801         // 5
802         pos = bi->next();
803         TEST_ASSERT(pos==5);
804         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
805         TEST_ASSERT_SUCCESS(status);
806         TEST_ASSERT(numStatuses == 2);
807         TEST_ASSERT(statusVals[0] == 400);
808         TEST_ASSERT(statusVals[1] == 500);
809
810         // ?
811         pos = bi->next();
812         TEST_ASSERT(pos==6);
813         numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
814         TEST_ASSERT_SUCCESS(status);
815         TEST_ASSERT(numStatuses == 1);
816         TEST_ASSERT(statusVals[0] == 0);
817
818         //
819         //  Check buffer overflow error handling.   Char == A
820         //
821         bi->first();
822         pos = bi->next();
823         TEST_ASSERT(pos==1);
824         memset(statusVals, -1, sizeof(statusVals));
825         numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
826         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
827         TEST_ASSERT(numStatuses == 2);
828         TEST_ASSERT(statusVals[0] == -1);
829
830         status = U_ZERO_ERROR;
831         memset(statusVals, -1, sizeof(statusVals));
832         numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
833         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
834         TEST_ASSERT(numStatuses == 2);
835         TEST_ASSERT(statusVals[0] == 100);
836         TEST_ASSERT(statusVals[1] == -1);
837
838         status = U_ZERO_ERROR;
839         memset(statusVals, -1, sizeof(statusVals));
840         numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
841         TEST_ASSERT_SUCCESS(status);
842         TEST_ASSERT(numStatuses == 2);
843         TEST_ASSERT(statusVals[0] == 100);
844         TEST_ASSERT(statusVals[1] == 300);
845         TEST_ASSERT(statusVals[2] == -1);
846     }
847     delete bi;
848
849}
850
851//
852//   Bug 2190 Regression test.   Builder crash on rule consisting of only a
853//                               $variable reference
854void RBBIAPITest::TestBug2190() {
855     UnicodeString rulesString1 = "$aaa = abcd;\n"
856                                  "$bbb = $aaa;\n"
857                                  "$bbb;\n";
858     UnicodeString testString1  = "abcdabcd";
859                                // 01234567890
860     int32_t bounds1[] = {0, 4, 8};
861     UErrorCode status=U_ZERO_ERROR;
862     UParseError    parseError;
863
864     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
865     if(U_FAILURE(status)) {
866         errln("FAIL : in construction");
867     } else {
868         bi->setText(testString1);
869         doBoundaryTest(*bi, testString1, bounds1);
870     }
871     delete bi;
872}
873
874
875void RBBIAPITest::TestRegistration() {
876#if !UCONFIG_NO_SERVICE
877    UErrorCode status = U_ZERO_ERROR;
878    BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
879
880    // ok to not delete these if we exit because of error?
881    BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
882    BreakIterator* root_word = BreakIterator::createWordInstance("", status);
883    BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
884
885    URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
886    {
887        if (ja_word && *ja_word == *root_word) {
888            errln("japan not different from root");
889        }
890    }
891
892    {
893        BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
894        UBool fail = TRUE;
895        if(result){
896            fail = *result != *ja_word;
897        }
898        delete result;
899        if (fail) {
900            errln("bad result for xx_XX/word");
901        }
902    }
903
904    {
905        BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
906        UBool fail = TRUE;
907        if(result){
908            fail = *result != *ja_char;
909        }
910        delete result;
911        if (fail) {
912            errln("bad result for ja_JP/char");
913        }
914    }
915
916    {
917        BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
918        UBool fail = TRUE;
919        if(result){
920            fail = *result != *root_char;
921        }
922        delete result;
923        if (fail) {
924            errln("bad result for xx_XX/char");
925        }
926    }
927
928    {
929        StringEnumeration* avail = BreakIterator::getAvailableLocales();
930        UBool found = FALSE;
931        const UnicodeString* p;
932        while ((p = avail->snext(status))) {
933            if (p->compare("xx") == 0) {
934                found = TRUE;
935                break;
936            }
937        }
938        delete avail;
939        if (!found) {
940            errln("did not find test locale");
941        }
942    }
943
944    {
945        UBool unreg = BreakIterator::unregister(key, status);
946        if (!unreg) {
947            errln("unable to unregister");
948        }
949    }
950
951    {
952        BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
953        BreakIterator* root = BreakIterator::createWordInstance("", status);
954        UBool fail = TRUE;
955        if(root){
956          fail = *root != *result;
957        }
958        delete root;
959        delete result;
960        if (fail) {
961            errln("did not get root break");
962        }
963    }
964
965    {
966        StringEnumeration* avail = BreakIterator::getAvailableLocales();
967        UBool found = FALSE;
968        const UnicodeString* p;
969        while ((p = avail->snext(status))) {
970            if (p->compare("xx") == 0) {
971                found = TRUE;
972                break;
973            }
974        }
975        delete avail;
976        if (found) {
977            errln("found test locale");
978        }
979    }
980
981    {
982        int32_t count;
983        UBool   foundLocale = FALSE;
984        const Locale *avail = BreakIterator::getAvailableLocales(count);
985        for (int i=0; i<count; i++) {
986            if (avail[i] == Locale::getEnglish()) {
987                foundLocale = TRUE;
988                break;
989            }
990        }
991        if (foundLocale == FALSE) {
992            errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
993        }
994    }
995
996
997    // ja_word was adopted by factory
998    delete ja_char;
999    delete root_word;
1000    delete root_char;
1001#endif
1002}
1003
1004void RBBIAPITest::RoundtripRule(const char *dataFile) {
1005    UErrorCode status = U_ZERO_ERROR;
1006    UParseError parseError;
1007    parseError.line = 0;
1008    parseError.offset = 0;
1009    UDataMemory *data = udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status);
1010    uint32_t length;
1011    const UChar *builtSource;
1012    const uint8_t *rbbiRules;
1013    const uint8_t *builtRules;
1014
1015    if (U_FAILURE(status)) {
1016        errln("Can't open \"%s\"", dataFile);
1017        return;
1018    }
1019
1020    builtRules = (const uint8_t *)udata_getMemory(data);
1021    builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1022    RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1023    if (U_FAILURE(status)) {
1024        errln("createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1025                u_errorName(status), parseError.line, parseError.offset);
1026        return;
1027    };
1028    rbbiRules = brkItr->getBinaryRules(length);
1029    logln("Comparing \"%s\" len=%d", dataFile, length);
1030    if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1031        errln("Built rules and rebuilt rules are different %s", dataFile);
1032        return;
1033    }
1034    delete brkItr;
1035    udata_close(data);
1036}
1037
1038void RBBIAPITest::TestRoundtripRules() {
1039    RoundtripRule("word");
1040    RoundtripRule("title");
1041    RoundtripRule("sent");
1042    RoundtripRule("line");
1043    RoundtripRule("char");
1044    if (!quick) {
1045        RoundtripRule("word_ja");
1046        RoundtripRule("word_POSIX");
1047    }
1048}
1049
1050//---------------------------------------------
1051// runIndexedTest
1052//---------------------------------------------
1053
1054void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1055{
1056    if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1057    switch (index) {
1058     //   case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1059        case  0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1060        case  1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1061        case  2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1062        case  3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1063        case  4: name = "TestIteration"; if (exec) TestIteration(); break;
1064        case  5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1065        case  6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
1066        case  7: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1067        case  8: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1068        case  9: name = "TestBug2190"; if (exec) TestBug2190(); break;
1069        case 10: name = "TestRegistration"; if (exec) TestRegistration(); break;
1070        case 11: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1071        case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1072
1073        default: name = ""; break; // needed to end loop
1074    }
1075}
1076
1077//---------------------------------------------
1078//Internal subroutines
1079//---------------------------------------------
1080
1081void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1082     logln((UnicodeString)"testIsBoundary():");
1083        int32_t p = 0;
1084        UBool isB;
1085        for (int32_t i = 0; i < text.length(); i++) {
1086            isB = bi.isBoundary(i);
1087            logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1088
1089            if (i == boundaries[p]) {
1090                if (!isB)
1091                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1092                p++;
1093            }
1094            else {
1095                if (isB)
1096                    errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1097            }
1098        }
1099}
1100void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1101    UnicodeString selected;
1102    UnicodeString expected=CharsToUnicodeString(expectedString);
1103
1104    if(gotoffset != expectedOffset)
1105         errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1106    if(start <= gotoffset){
1107        testString.extractBetween(start, gotoffset, selected);
1108    }
1109    else{
1110        testString.extractBetween(gotoffset, start, selected);
1111    }
1112    if(selected.compare(expected) != 0)
1113         errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1114    else
1115        logln(prettify("****selected \"" + selected + "\""));
1116}
1117
1118#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1119