1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/********************************************************************
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * COPYRIGHT:
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (c) 1999-2010, International Business Machines Corporation and
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * others. All Rights Reserved.
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ********************************************************************/
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/************************************************************************
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Date        Name        Description
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   12/15/99    Madhu        Creation.
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   01/12/2000  Madhu        Updated for changed API and added new tests
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)************************************************************************/
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypeinfo.h"  // for 'typeid' to work
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_BREAK_ITERATION
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/brkiter.h"
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/rbbi.h"
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchar.h"
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utf16.h"
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucnv.h"
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/schriter.h"
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uniset.h"
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/regex.h"        // TODO: make conditional on regexp being built.
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ustring.h"
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utext.h"
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "intltest.h"
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "rbbitst.h"
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <string.h>
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvector.h"
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvectr32.h"
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "triedict.h"
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <string.h>
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <stdio.h>
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <stdlib.h>
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/numfmt.h"
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uscript.h"
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define TEST_ASSERT(x) {if (!(x)) { \
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    errln("Failure in file %s, line %d", __FILE__, __LINE__);}}
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(errcode));}}
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// runIndexedTest
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* params )
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (exec) logln("TestSuite RuleBasedBreakIterator: ");
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    switch (index) {
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_FILE_IO
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 0: name = "TestBug4153072";
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestBug4153072();                         break;
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 0: name = "skip";
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 1: name = "TestJapaneseLineBreak";
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestJapaneseLineBreak();                  break;
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 2: name = "TestStatusReturn";
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestStatusReturn();                       break;
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_FILE_IO
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 3: name = "TestUnicodeFiles";
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestUnicodeFiles();                       break;
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 4: name = "TestEmptyString";
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestEmptyString();                        break;
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 3: case 4: name = "skip";
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 5: name = "TestGetAvailableLocales";
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestGetAvailableLocales();                break;
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 6: name = "TestGetDisplayName";
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestGetDisplayName();                     break;
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_FILE_IO
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 7: name = "TestEndBehaviour";
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestEndBehaviour();                       break;
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 8: name = "TestMixedThaiLineBreak";
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             if(exec) TestMixedThaiLineBreak();                break;
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 9: name = "TestThaiLineBreak";
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             if(exec) TestThaiLineBreak();                     break;
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 10: name = "TestMaiyamok";
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             if(exec) TestMaiyamok();                          break;
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 11: name = "TestWordBreaks";
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             if(exec) TestWordBreaks();                        break;
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 12: name = "TestWordBoundary";
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             if(exec) TestWordBoundary();                      break;
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 13: name = "TestLineBreaks";
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             if(exec) TestLineBreaks();                        break;
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 14: name = "TestSentBreaks";
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             if(exec) TestSentBreaks();                        break;
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 15: name = "TestExtended";
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             if(exec) TestExtended();                          break;
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 7: case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15: name = "skip";
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             break;
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 16:
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             if(exec) {
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               name = "TestMonkey";
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               TestMonkey(params);
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #else
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               name = "skip";
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #endif
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             }
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                               break;
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_FILE_IO
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 17: name = "TestBug3818";
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestBug3818();                            break;
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 18: name = "TestJapaneseWordBreak";
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestJapaneseWordBreak();                  break;
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 17: case 18: name = "skip";
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 19: name = "TestDebug";
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestDebug();                              break;
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 20: name = "TestTrieDict";
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestTrieDict();                           break;
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_FILE_IO
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 21: name = "TestBug5775";
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (exec) TestBug5775();                           break;
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 22: name = "TestThaiBreaks";
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (exec) TestThaiBreaks();                        break;
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 23: name = "TestTailoredBreaks";
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (exec) TestTailoredBreaks();                    break;
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 24: name = "TestTrieDictWithValue";
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(exec) TestTrieDictWithValue();                  break;
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 21: case 22: case 23: case 24: name = "skip";
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 25: name = "TestDictRules";
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (exec) TestDictRules();                         break;
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 25: name = "TestBug5532";
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (exec) TestBug5532();                           break;
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        default: name = ""; break; //needed to end loop
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   class BITestData   Holds a set of Break iterator test data and results
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                      Includes
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                         - the string data to be broken
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                         - a vector of the expected break positions.
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                         - a vector of source line numbers for the data,
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                               (to help see where errors occured.)
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                         - The expected break tag values.
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                         - Vectors of actual break positions and tag values.
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                         - Functions for comparing actual with expected and
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                            reporting errors.
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class BITestData {
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString    fDataToBreak;
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector          fExpectedBreakPositions;
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector          fExpectedTags;
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector          fLineNum;
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector          fActualBreakPositions;   // Test Results.
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector          fActualTags;
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BITestData(UErrorCode &status);
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void             addDataChunk(const char *data, int32_t tag, int32_t lineNum, UErrorCode status);
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void             checkResults(const char *heading, RBBITest *test);
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void             err(const char *heading, RBBITest *test, int32_t expectedIdx, int32_t actualIdx);
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void             clearResults();
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Constructor.
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)BITestData::BITestData(UErrorCode &status)
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles): fExpectedBreakPositions(status), fExpectedTags(status),  fLineNum(status), fActualBreakPositions(status),
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  fActualTags(status)
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// addDataChunk.   Add a section (non-breaking) piece if data to the test data.
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                 The macro form collects the line number, which is helpful
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                 when tracking down failures.
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                 A null data item is inserted at the start of each test's data
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                  to put the starting zero into the data list.  The position saved for
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                  each non-null item is its ending position.
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ADD_DATACHUNK(td, data, tag, status)   td.addDataChunk(data, tag, __LINE__, status);
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void BITestData::addDataChunk(const char *data, int32_t tag, int32_t lineNum, UErrorCode status) {
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {return;}
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (data != NULL) {
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fDataToBreak.append(CharsToUnicodeString(data));
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fExpectedBreakPositions.addElement(fDataToBreak.length(), status);
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fExpectedTags.addElement(tag, status);
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLineNum.addElement(lineNum, status);
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  checkResults.   Compare the actual and expected break positions, report any differences.
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void BITestData::checkResults(const char *heading, RBBITest *test) {
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t   expectedIndex = 0;
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t   actualIndex = 0;
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (;;) {
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // If we've run through both the expected and actual results vectors, we're done.
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //   break out of the loop.
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (expectedIndex >= fExpectedBreakPositions.size() &&
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            actualIndex   >= fActualBreakPositions.size()) {
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (expectedIndex >= fExpectedBreakPositions.size()) {
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            err(heading, test, expectedIndex-1, actualIndex);
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            actualIndex++;
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (actualIndex >= fActualBreakPositions.size()) {
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            err(heading, test, expectedIndex, actualIndex-1);
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expectedIndex++;
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fActualBreakPositions.elementAti(actualIndex) != fExpectedBreakPositions.elementAti(expectedIndex)) {
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            err(heading, test, expectedIndex, actualIndex);
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Try to resync the positions of the indices, to avoid a rash of spurious erros.
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fActualBreakPositions.elementAti(actualIndex) < fExpectedBreakPositions.elementAti(expectedIndex)) {
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                actualIndex++;
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                expectedIndex++;
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fActualTags.elementAti(actualIndex) != fExpectedTags.elementAti(expectedIndex)) {
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            test->errln("%s, tag mismatch.  Test Line = %d, expected tag=%d, got %d",
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                heading, fLineNum.elementAt(expectedIndex),
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fExpectedTags.elementAti(expectedIndex), fActualTags.elementAti(actualIndex));
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        actualIndex++;
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectedIndex++;
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  err   -  An error was found.  Report it, along with information about where the
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                                incorrectly broken test data appeared in the source file.
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void    BITestData::err(const char *heading, RBBITest *test, int32_t expectedIdx, int32_t actualIdx)
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t   expected = fExpectedBreakPositions.elementAti(expectedIdx);
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t   actual   = fActualBreakPositions.elementAti(actualIdx);
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t   o        = 0;
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t   line     = fLineNum.elementAti(expectedIdx);
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (expectedIdx > 0) {
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // The line numbers are off by one because a premature break occurs somewhere
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    within the previous item, rather than at the start of the current (expected) item.
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    We want to report the offset of the unexpected break from the start of
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //      this previous item.
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        o    = actual - fExpectedBreakPositions.elementAti(expectedIdx-1);
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (actual < expected) {
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        test->errln("%s unexpected break at offset %d in test item from line %d. actual break: %d  expected break: %d", heading, o, line, actual, expected);
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        test->errln("%s Failed to find break at end of item from line %d. actual break: %d  expected break: %d", heading, line, actual, expected);
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void BITestData::clearResults() {
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fActualBreakPositions.removeAllElements();
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fActualTags.removeAllElements();
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------------
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    Cannned Test Characters
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------------
304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const UChar cannedTestArray[] = {
306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    0x0001, 0x0002, 0x0003, 0x0004, 0x0020, 0x0021, '\\', 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028, 0x0029, 0x002b, 0x002d, 0x0030, 0x0031,
307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    0x0032, 0x0033, 0x0034, 0x003c, 0x003d, 0x003e, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x005b, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x007b,
308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    0x007d, 0x007c, 0x002c, 0x00a0, 0x00a2,
309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00ab, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b2, 0x00b3,
310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    0x00b4, 0x00b9, 0x00bb, 0x00bc, 0x00bd, 0x02b0, 0x02b1, 0x02b2, 0x02b3, 0x02b4, 0x0300, 0x0301, 0x0302, 0x0303,
311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    0x0304, 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x0903, 0x093e, 0x093f, 0x0940, 0x0949, 0x0f3a, 0x0f3b, 0x2000,
312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    0x2001, 0x2002, 0x200c, 0x200d, 0x200e, 0x200f, 0x2010, 0x2011, 0x2012, 0x2028, 0x2029, 0x202a, 0x203e, 0x203f,
313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    0x2040, 0x20dd, 0x20de, 0x20df, 0x20e0, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x0000
314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UnicodeString* cannedTestChars = 0;
317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define  halfNA     "\\u0928\\u094d\\u200d"
319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define  halfSA     "\\u0938\\u094d\\u200d"
320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define  halfCHA    "\\u091a\\u094d\\u200d"
321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define  halfKA     "\\u0915\\u094d\\u200d"
322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define  deadTA     "\\u0924\\u094d"
323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------
325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    RBBITest    constructor and destructor
327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------
329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBITest::RBBITest() {
331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString temp(cannedTestArray);
332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    cannedTestChars = new UnicodeString();
333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *cannedTestChars += (UChar)0x0000;
334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *cannedTestChars += temp;
335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBITest::~RBBITest() {
339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete cannedTestChars;
340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int T_NUMBER = 100;
344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int T_LETTER = 200;
345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int T_H_OR_K = 300;
346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int T_IDEO   = 400;
347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------
354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//Testing the BreakIterator for devanagari script
355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------
356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define deadRA   "\\u0930\\u094d"         /*deadform RA = devanagari RA + virama*/
358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define deadPHA  "\\u092b\\u094d"         /*deadform PHA = devanagari PHA + virama*/
359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define deadTTHA "\\u0920\\u094d"
360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define deadPA   "\\u092a\\u094d"
361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define deadSA   "\\u0938\\u094d"
362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define visarga  "\\u0903"                /*devanagari visarga looks like a english colon*/
363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------------
370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   Test for status {tag} return value from break rules.
372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//        TODO:  a more thorough test.
373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------------
375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestStatusReturn() {
376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     UnicodeString rulesString1("$Letters = [:L:];\n"
377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  "$Numbers = [:N:];\n"
378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  "$Letters+{1};\n"
379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  "$Numbers+{2};\n"
380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  "Help\\ {4}/me\\!;\n"
381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  "[^$Letters $Numbers];\n"
382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  "!.*;\n", -1, US_INV);
383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     UnicodeString testString1  = "abc123..abc Help me Help me!";
384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                // 01234567890123456789012345678
385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     int32_t bounds1[]   = {0, 3, 6, 7, 8, 11, 12, 16, 17, 19, 20, 25, 27, 28, -1};
386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     int32_t brkStatus[] = {0, 1, 2, 0, 0,  1,  0,  1,  0,  1,  0,  4,  1,  0, -1};
387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     UErrorCode status=U_ZERO_ERROR;
389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     UParseError    parseError;
390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     if(U_FAILURE(status)) {
393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         dataerrln("FAIL : in construction - %s", u_errorName(status));
394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     } else {
395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         int32_t  pos;
396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         int32_t  i = 0;
397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         bi->setText(testString1);
398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         for (pos=bi->first(); pos!= BreakIterator::DONE; pos=bi->next()) {
399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             if (pos != bounds1[i]) {
400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 errln("FAIL:  expected break at %d, got %d\n", bounds1[i], pos);
401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 break;
402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             }
403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             int tag = bi->getRuleStatus();
405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             if (tag != brkStatus[i]) {
406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 errln("FAIL:  break at %d, expected tag %d, got tag %d\n", pos, brkStatus[i], tag);
407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 break;
408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             }
409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             i++;
410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         }
411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     }
412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     delete bi;
413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void printStringBreaks(UnicodeString ustr, int expected[],
417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              int expectedcount)
418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char name[100];
421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("code    alpha extend alphanum type word sent line name\n");
422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int j;
423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (j = 0; j < ustr.length(); j ++) {
424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (expectedcount > 0) {
425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int k;
426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for (k = 0; k < expectedcount; k ++) {
427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (j == expected[k]) {
428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    printf("------------------------------------------------ %d\n",
429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                           j);
430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c = ustr.char32At(j);
434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (c > 0xffff) {
435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            j ++;
436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status);
438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c,
439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                           u_isUAlphabetic(c),
440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                           u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND),
441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                           u_isalnum(c),
442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                           u_getPropertyValueName(UCHAR_GENERAL_CATEGORY,
443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                  u_charType(c),
444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                  U_SHORT_PROPERTY_NAME),
445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                           u_getPropertyValueName(UCHAR_WORD_BREAK,
446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                  u_getIntPropertyValue(c,
447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                          UCHAR_WORD_BREAK),
448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                  U_SHORT_PROPERTY_NAME),
449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                           u_getPropertyValueName(UCHAR_SENTENCE_BREAK,
450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                   u_getIntPropertyValue(c,
451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                           UCHAR_SENTENCE_BREAK),
452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                   U_SHORT_PROPERTY_NAME),
453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                           u_getPropertyValueName(UCHAR_LINE_BREAK,
454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                   u_getIntPropertyValue(c,
455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                           UCHAR_LINE_BREAK),
456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                   U_SHORT_PROPERTY_NAME),
457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                           name);
458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestThaiLineBreak() {
462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BITestData thaiLineSelection(status);
464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // \u0e2f-- the Thai paiyannoi character-- isn't a letter.  It's a symbol that
466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // represents elided letters at the end of a long word.  It should be bound to
467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the end of the word and not treated as an independent punctuation mark.
468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, NULL, 0, status);           // Break at start of data
471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e2a\\u0e16\\u0e32\\u0e19\\u0e35\\u0e2f", 0, status);
472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e08\\u0e30", 0, status);
473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e14\\u0e21", 0, status);
474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e08\\u0e49\\u0e32", 0, status);
475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//        ADD_DATACHUNK(thaiLineSelection, "\\u0e2b\\u0e19\\u0e49\\u0e32", 0, status);
476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//        ADD_DATACHUNK(thaiLineSelection, "\\u0e17\\u0e35\\u0e48", 0, status);
477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48", 0, status);
478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the commented-out lines (I think) are the preferred result; this line is what our current dictionary is giving us
479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e2d\\u0e2d\\u0e01", 0, status);
480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e21\\u0e32", 0, status);
481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e23\\u0e48\\u0e07", 0, status);
482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e1a\\u0e32\\u0e22", 0, status);
483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e2d\\u0e22\\u0e48\\u0e32\\u0e07", 0, status);
484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e15\\u0e47\\u0e21", 0, status);
485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the one time where the paiyannoi occurs somewhere other than at the end
487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // of a word is in the Thai abbrevation for "etc.", which both begins and
488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // ends with a paiyannoi
489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e2f\\u0e25\\u0e2f", 0, status);
490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e17\\u0e35\\u0e48", 0, status);
491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e19\\u0e31\\u0e49\\u0e19", 0, status);
492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(
494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        Locale("th"), status);
495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status))
496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Failed to create the BreakIterator for Thai locale in TestThaiLineBreak. - %s", u_errorName(status));
498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    generalIteratorTest(*e, thaiLineSelection);
502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete e;
503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestMixedThaiLineBreak()
508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode   status = U_ZERO_ERROR;
510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BITestData   thaiLineSelection(status);
511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, NULL, 0, status);           // Break at start of data
513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // @suwit -- Test Arabic numerals, Thai numerals, Punctuation and English characters
516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // start
517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E1B\\u0E35", 0, status);
519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E1E\\u0E38\\u0E17\\u0E18\\u0E28\\u0E31\\u0E01\\u0E23\\u0E32\\u0E0A ", 0, status);
520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "2545 ", 0, status);
521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E40\\u0E1B\\u0E47\\u0E19", 0, status);
522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E1B\\u0E35", 0, status);
523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E09\\u0E25\\u0E2D\\u0E07", 0, status);
524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E04\\u0E23\\u0E1A", 0, status);
525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E23\\u0E2D\\u0E1A ", 0, status);
526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\"\\u0E52\\u0E52\\u0E50 ", 0, status);
527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E1b\\u0E35\" ", 0, status);
528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E02\\u0E2d\\u0E07", 0, status);
529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E01\\u0E23\\u0E38\\u0E07", 0, status);
530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E23\\u0E31\\u0E15\\u0E19\\u0E42\\u0E01\\u0E2A\\u0E34\\u0E19\\u0E17\\u0E23\\u0E4C ", 0, status);
531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "(\\u0E01\\u0E23\\u0E38\\u0E07\\u0E40\\u0E17\\u0E1e\\u0E2F", 0, status);
532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0E2B\\u0E23\\u0E37\\u0E2D ", 0, status);
533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "Bangkok)", 0, status);
534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // @suwit - end of changes
536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale("th"), status);
539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status))
540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Failed to create the BreakIterator for Thai locale in TestMixedThaiLineBreak. - %s", u_errorName(status));
542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    generalIteratorTest(*e, thaiLineSelection);
547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete e;
548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestMaiyamok()
552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BITestData   thaiLineSelection(status);
555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, NULL, 0, status);           // Break at start of data
556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the Thai maiyamok character is a shorthand symbol that means "repeat the previous
557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // word".  Instead of appearing as a word unto itself, however, it's kept together
558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // with the word before it
559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e44\\u0e1b\\u0e46", 0, status);
560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e21\\u0e32\\u0e46", 0, status);
561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e2b\\u0e27\\u0e48\\u0e32\\u0e07", 0, status);
562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e01\\u0e23\\u0e38\\u0e07", 0, status);
563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e17\\u0e1e", 0, status);
564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e41\\u0e25\\u0e30", 0, status);
565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e03\\u0e35", 0, status);
566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e22\\u0e07", 0, status);
567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(thaiLineSelection, "\\u0e43\\u0e2b\\u0e21\\u0e48", 0, status);
568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(
570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        Locale("th"), status);
571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status))
573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Failed to create the BreakIterator for Thai locale in TestMaiyamok. - %s", u_errorName(status));
575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    generalIteratorTest(*e, thaiLineSelection);
578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete e;
579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestBug3818() {
584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Four Thai words...
587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar thaiWordData[] = {  0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0x0E2B,0x0E0D,0x0E48,
588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                           0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0x0E2B,0x0E0D,0x0E48, 0 };
589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString  thaiStr(thaiWordData);
590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator* bi =
592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale("th"), status);
593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status) || bi == NULL) {
594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Fail at file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi->setText(thaiStr);
598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t  startOfSecondWord = bi->following(1);
600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (startOfSecondWord != 4) {
601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Fail at file %s, line %d expected start of word at 4, got %d",
602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            __FILE__, __LINE__, startOfSecondWord);
603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    startOfSecondWord = bi->following(0);
605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (startOfSecondWord != 4) {
606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Fail at file %s, line %d expected start of word at 4, got %d",
607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            __FILE__, __LINE__, startOfSecondWord);
608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestJapaneseWordBreak() {
614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TODO: Rewrite this test for a dictionary-based word breaking.
615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0
616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BITestData   japaneseWordSelection(status);
618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(japaneseWordSelection, NULL, 0, status);           // Break at start of data
620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(japaneseWordSelection, "\\u4ECA\\u65E5", 400, status); //2
621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(japaneseWordSelection, "\\u306F\\u3044\\u3044", 300, status); //5
622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(japaneseWordSelection, "\\u5929\\u6C17", 400, status); //7
623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(japaneseWordSelection, "\\u3067\\u3059\\u306D", 300, status); //10
624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(japaneseWordSelection, "\\u3002", 0, status); //11
625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(japaneseWordSelection, "\\u000D\\u000A", 0, status); //12
626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(
628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        Locale("ja"), status);
629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status))
630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Failed to create the BreakIterator for Japanese locale in TestJapaneseWordBreak.\n");
632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    generalIteratorTest(*e, japaneseWordSelection);
636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete e;
637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestTrieDict() {
641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode      status  = U_ZERO_ERROR;
642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Open and read the test data file.
645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *testDataDirectory = IntlTest::getSourceTestData(status);
647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char testFileName[1000];
648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (testDataDirectory == NULL || strlen(testDataDirectory) + strlen("riwords.txt") + 10 >= sizeof(testFileName)) {
649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Can't open test data.  Path too long.");
650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    strcpy(testFileName, testDataDirectory);
653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    strcat(testFileName, "riwords.txt");
654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Items needing deleting at the end
656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    MutableTrieDictionary *mutableDict = NULL;
657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CompactTrieDictionary *compactDict = NULL;
658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet            *breaks      = NULL;
659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar                 *testFile    = NULL;
660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    StringEnumeration     *enumer1     = NULL;
661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    StringEnumeration     *enumer2     = NULL;
662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    MutableTrieDictionary *mutable2    = NULL;
663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    StringEnumeration     *cloneEnum   = NULL;
664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CompactTrieDictionary *compact2    = NULL;
665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *originalWord = NULL;
668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *cloneWord    = NULL;
669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar *current;
670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar *word;
671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar uc;
672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t wordLen;
673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t wordCount;
674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t testCount;
675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int    len;
677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testFile = ReadAndConvertFile(testFileName, len, NULL, status);
678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup; /* something went wrong, error already output */
680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    mutableDict = new MutableTrieDictionary(0x0E1C, status);
683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Error creating MutableTrieDictionary: %s\n", u_errorName(status));
685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breaks = new UnicodeSet;
689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breaks->add(0x000A);     // Line Feed
690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breaks->add(0x000D);     // Carriage Return
691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breaks->add(0x2028);     // Line Separator
692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breaks->add(0x2029);     // Paragraph Separator
693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Now add each non-comment line of the file as a word.
695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    current = testFile;
696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    word = current;
697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uc = *current++;
698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    wordLen = 0;
699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    wordCount = 0;
700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (uc) {
702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (uc == 0x0023) {     // #comment line, skip
703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (uc && !breaks->contains(uc)) {
704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                uc = *current++;
705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else while (uc && !breaks->contains(uc)) {
708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ++wordLen;
709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            uc = *current++;
710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (wordLen > 0) {
712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            mutableDict->addWord(word, wordLen, status);
713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (U_FAILURE(status)) {
714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("Could not add word to mutable dictionary; status %s\n", u_errorName(status));
715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                goto cleanup;
716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            wordCount += 1;
718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Find beginning of next line
721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (uc && breaks->contains(uc)) {
722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            uc = *current++;
723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        word = current-1;
725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        wordLen = 0;
726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wordCount < 50) {
729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Word count (%d) unreasonably small\n", wordCount);
730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer1 = mutableDict->openWords(status);
734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not open mutable dictionary enumerator: %s\n", u_errorName(status));
736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testCount = 0;
740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wordCount != (testCount = enumer1->count(status))) {
741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("MutableTrieDictionary word count (%d) differs from file word count (%d), with status %s\n",
742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testCount, wordCount, u_errorName(status));
743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Now compact it
747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    compactDict = new CompactTrieDictionary(*mutableDict, status);
748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Failed to create CompactTrieDictionary: %s\n", u_errorName(status));
750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer2 = compactDict->openWords(status);
754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not open compact trie dictionary enumerator: %s\n", u_errorName(status));
756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wordCount != (testCount = enumer2->count(status))) {
760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("CompactTrieDictionary word count (%d) differs from file word count (%d), with status %s\n",
761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testCount, wordCount, u_errorName(status));
762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (typeid(*enumer1) == typeid(*enumer2)) {
766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("CompactTrieEnumeration and MutableTrieEnumeration typeids are the same");
767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete enumer1;
769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer1 = NULL;
770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete enumer2;
771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer2 = NULL;
772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Now un-compact it
774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    mutable2 = compactDict->cloneMutable(status);
775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not clone CompactTrieDictionary to MutableTrieDictionary: %s\n", u_errorName(status));
777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    cloneEnum = mutable2->openWords(status);
781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not create cloned mutable enumerator: %s\n", u_errorName(status));
783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wordCount != (testCount = cloneEnum->count(status))) {
787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Cloned MutableTrieDictionary word count (%d) differs from file word count (%d), with status %s\n",
788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testCount, wordCount, u_errorName(status));
789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Compact original dictionary to clone. Note that we can only compare the same kind of
793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // dictionary as the order of the enumerators is not guaranteed to be the same between
794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // different kinds
795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer1 = mutableDict->openWords(status);
796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not re-open mutable dictionary enumerator: %s\n", u_errorName(status));
798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     }
800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    originalWord = enumer1->snext(status);
802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    cloneWord = cloneEnum->snext(status);
803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (U_SUCCESS(status) && originalWord != NULL && cloneWord != NULL) {
804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (*originalWord != *cloneWord) {
805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Original and cloned MutableTrieDictionary word mismatch\n");
806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            goto cleanup;
807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        originalWord = enumer1->snext(status);
809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        cloneWord = cloneEnum->snext(status);
810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Enumeration failed: %s\n", u_errorName(status));
814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (originalWord != cloneWord) {
818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Original and cloned MutableTrieDictionary ended enumeration at different points\n");
819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Test the data copying constructor for CompactTrieDict, and the data access APIs.
823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    compact2 = new CompactTrieDictionary(compactDict->data(), status);
824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("CompactTrieDictionary(const void *,...) failed\n");
826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (compact2->dataSize() == 0) {
830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("CompactTrieDictionary->dataSize() == 0\n");
831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Now count the words via the second dictionary
835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete enumer1;
836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer1 = compact2->openWords(status);
837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not open compact trie dictionary 2 enumerator: %s\n", u_errorName(status));
839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wordCount != (testCount = enumer1->count(status))) {
843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("CompactTrieDictionary 2 word count (%d) differs from file word count (%d), with status %s\n",
844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testCount, wordCount, u_errorName(status));
845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)cleanup:
849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete compactDict;
850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete mutableDict;
851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete breaks;
852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete[] testFile;
853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete enumer1;
854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete mutable2;
855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete cloneEnum;
856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete compact2;
857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*TODO: delete later*/
860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)inline void writeEnumerationToFile(StringEnumeration *enumer, char *filename){
861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode      status  = U_ZERO_ERROR;
862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    FILE *outfile = fopen(filename,"w");
863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UConverter *cvt = ucnv_open("UTF-8", &status);
864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status))
865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(outfile != NULL){
867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_ZERO_ERROR;
868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        const UnicodeString *word = enumer->snext(status);
869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (word != NULL && U_SUCCESS(status)) {
870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            char u8word[500];
871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            status = U_ZERO_ERROR;
872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ucnv_fromUChars(cvt, u8word, 500, word->getBuffer(), word->length(),
873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    &status);
874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fprintf(outfile,"%s\n", u8word);
875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            status = U_ZERO_ERROR;
876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            word = enumer->snext(status);
877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fclose(outfile);
879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucnv_close(cvt);
881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// A very simple helper class to streamline the buffer handling in
884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TestTrieDictWithValue
885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)template<class T, size_t N>
886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class AutoBuffer {
887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) public:
888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  AutoBuffer(size_t size) : buffer(stackBuffer) {
889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (size > N)
890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      buffer = new T[size];
891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  ~AutoBuffer() {
893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (buffer != stackBuffer)
894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      delete [] buffer;
895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  T* elems() {
897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return buffer;
898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  const T& operator[] (size_t i) const {
900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return buffer[i];
901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  T& operator[] (size_t i) {
903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return buffer[i];
904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) private:
906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  T stackBuffer[N];
907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  T* buffer;
908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  AutoBuffer();
909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------
912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TestTrieDictWithValue    Test trie dictionaries with logprob values and
914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// more than 2^16 nodes after compaction.
915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------
917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestTrieDictWithValue() {
918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode      status  = U_ZERO_ERROR;
919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Open and read the test data file.
922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *testDataDirectory = IntlTest::getSourceTestData(status);
924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *filename = "cjdict-truncated.txt";
925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char testFileName[1000];
926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (testDataDirectory == NULL || strlen(testDataDirectory) + strlen(filename) + 10 >= sizeof(testFileName)) {
927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Can't open test data.  Path too long.");
928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    strcpy(testFileName, testDataDirectory);
931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    strcat(testFileName, filename);
932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Items needing deleting at the end
934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    MutableTrieDictionary *mutableDict = NULL;
935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CompactTrieDictionary *compactDict = NULL;
936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet            *breaks      = NULL;
937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar                 *testFile    = NULL;
938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    StringEnumeration     *enumer1     = NULL;
939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    StringEnumeration     *enumer2     = NULL;
940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    MutableTrieDictionary *mutable2    = NULL;
941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    StringEnumeration     *cloneEnum   = NULL;
942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CompactTrieDictionary *compact2    = NULL;
943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    NumberFormat          *nf           = NULL;
944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UText *originalText = NULL, *cloneText = NULL;
945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *originalWord = NULL;
947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *cloneWord    = NULL;
948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar *current;
949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar *word;
950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar uc;
951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t wordLen;
952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t wordCount;
953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t testCount;
954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t valueLen;
955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int counter = 0;
956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int    len;
958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testFile = ReadAndConvertFile(testFileName, len, NULL, status);
959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup; /* something went wrong, error already output */
961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    mutableDict = new MutableTrieDictionary(0x0E1C, status, TRUE);
964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Error creating MutableTrieDictionary: %s\n", u_errorName(status));
966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breaks = new UnicodeSet;
970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breaks->add(0x000A);     // Line Feed
971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breaks->add(0x000D);     // Carriage Return
972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breaks->add(0x2028);     // Line Separator
973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breaks->add(0x2029);     // Paragraph Separator
974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breaks->add(0x0009);     // Tab character
975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Now add each non-comment line of the file as a word.
977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    current = testFile;
978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    word = current;
979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uc = *current++;
980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    wordLen = 0;
981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    wordCount = 0;
982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    nf = NumberFormat::createInstance(status);
983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (uc) {
985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString ucharValue;
986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        valueLen = 0;
987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (uc == 0x0023) {     // #comment line, skip
989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (uc && !breaks->contains(uc)) {
990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                uc = *current++;
991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else{
994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (uc && !breaks->contains(uc)) {
995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                ++wordLen;
996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                uc = *current++;
997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(uc == 0x0009){ //separator is a tab char, read in num after tab
999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                uc = *current++;
1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                while (uc && !breaks->contains(uc)) {
1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    ucharValue.append(uc);
1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    uc = *current++;
1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (wordLen > 0) {
1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            Formattable value((int32_t)0);
1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            nf->parse(ucharValue.getTerminatedBuffer(), value, status);
1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(U_FAILURE(status)){
1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("parsing of value failed when reading in dictionary\n");
1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                goto cleanup;
1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            mutableDict->addWord(word, wordLen, status, value.getLong());
1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (U_FAILURE(status)) {
1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("Could not add word to mutable dictionary; status %s\n", u_errorName(status));
1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                goto cleanup;
1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            wordCount += 1;
1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Find beginning of next line
1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (uc && breaks->contains(uc)) {
1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            uc = *current++;
1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        word = current-1;
1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        wordLen = 0;
1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wordCount < 50) {
1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Word count (%d) unreasonably small\n", wordCount);
1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer1 = mutableDict->openWords(status);
1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not open mutable dictionary enumerator: %s\n", u_errorName(status));
1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testCount = 0;
1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wordCount != (testCount = enumer1->count(status))) {
1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("MutableTrieDictionary word count (%d) differs from file word count (%d), with status %s\n",
1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                testCount, wordCount, u_errorName(status));
1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Now compact it
1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    compactDict = new CompactTrieDictionary(*mutableDict, status);
1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Failed to create CompactTrieDictionary: %s\n", u_errorName(status));
1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer2 = compactDict->openWords(status);
1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not open compact trie dictionary enumerator: %s\n", u_errorName(status));
1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //delete later
1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    writeEnumerationToFile(enumer1, "/home/jchye/mutable.txt");
1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    writeEnumerationToFile(enumer2, "/home/jchye/compact.txt");
1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer1->reset(status);
1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer2->reset(status);
1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    originalWord = enumer1->snext(status);
1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    cloneWord = enumer2->snext(status);
1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (U_SUCCESS(status) && originalWord != NULL && cloneWord != NULL) {
1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (*originalWord != *cloneWord) {
1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("MutableTrieDictionary and CompactTrieDictionary word mismatch at %d, lengths are %d and %d\n",
1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    counter, originalWord->length(), cloneWord->length());
1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            goto cleanup;
1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // check if attached values of the same word in both dictionaries tally
1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0
1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t lengths1[originalWord->length()], lengths2[cloneWord->length()];
1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uint16_t values1[originalWord->length()], values2[cloneWord->length()];
1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        AutoBuffer<int32_t, 20> lengths1(originalWord->length());
1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        AutoBuffer<int32_t, 20> lengths2(cloneWord->length());
1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        AutoBuffer<uint16_t, 20> values1(originalWord->length());
1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        AutoBuffer<uint16_t, 20> values2(cloneWord->length());
1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        originalText = utext_openConstUnicodeString(originalText, originalWord, &status);
1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        cloneText = utext_openConstUnicodeString(cloneText, cloneWord, &status);
1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int count1, count2;
1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        mutableDict->matches(originalText, originalWord->length(), lengths1.elems(), count1, originalWord->length(), values1.elems());
1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        compactDict->matches(cloneText, cloneWord->length(), lengths2.elems(), count2, cloneWord->length(), values2.elems());
1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(values1[count1-1] != values2[count2-1]){
1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Values of word %d in MutableTrieDictionary and CompactTrieDictionary do not match, with values %d and %d\n",
1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  counter, values1[count1-1], values2[count2-1]);
1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            goto cleanup;
1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        counter++;
1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        originalWord = enumer1->snext(status);
1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        cloneWord = enumer2->snext(status);
1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (enumer1->getDynamicClassID() == enumer2->getDynamicClassID()) {
1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("CompactTrieEnumeration and MutableTrieEnumeration ClassIDs are the same");
1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete enumer1;
1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer1 = NULL;
1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete enumer2;
1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer2 = NULL;
1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Now un-compact it
1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    mutable2 = compactDict->cloneMutable(status);
1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not clone CompactTrieDictionary to MutableTrieDictionary: %s\n", u_errorName(status));
1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    cloneEnum = mutable2->openWords(status);
1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not create cloned mutable enumerator: %s\n", u_errorName(status));
1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wordCount != (testCount = cloneEnum->count(status))) {
1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Cloned MutableTrieDictionary word count (%d) differs from file word count (%d), with status %s\n",
1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                testCount, wordCount, u_errorName(status));
1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Compact original dictionary to clone. Note that we can only compare the same kind of
1134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // dictionary as the order of the enumerators is not guaranteed to be the same between
1135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // different kinds
1136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer1 = mutableDict->openWords(status);
1137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not re-open mutable dictionary enumerator: %s\n", u_errorName(status));
1139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    counter = 0;
1143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    originalWord = enumer1->snext(status);
1144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    cloneWord = cloneEnum->snext(status);
1145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (U_SUCCESS(status) && originalWord != NULL && cloneWord != NULL) {
1146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (*originalWord != *cloneWord) {
1147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Original and cloned MutableTrieDictionary word mismatch\n");
1148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            goto cleanup;
1149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // check if attached values of the same word in both dictionaries tally
1152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        AutoBuffer<int32_t, 20> lengths1(originalWord->length());
1153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        AutoBuffer<int32_t, 20> lengths2(cloneWord->length());
1154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        AutoBuffer<uint16_t, 20> values1(originalWord->length());
1155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        AutoBuffer<uint16_t, 20> values2(cloneWord->length());
1156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        originalText = utext_openConstUnicodeString(originalText, originalWord, &status);
1157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        cloneText = utext_openConstUnicodeString(cloneText, cloneWord, &status);
1158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int count1, count2;
1160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        mutableDict->matches(originalText, originalWord->length(), lengths1.elems(), count1, originalWord->length(), values1.elems());
1161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        mutable2->matches(cloneText, cloneWord->length(), lengths2.elems(), count2, cloneWord->length(), values2.elems());
1162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(values1[count1-1] != values2[count2-1]){
1164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Values of word %d in original and cloned MutableTrieDictionary do not match, with values %d and %d\n",
1165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  counter, values1[count1-1], values2[count2-1]);
1166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            goto cleanup;
1167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        counter++;
1170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        originalWord = enumer1->snext(status);
1172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        cloneWord = cloneEnum->snext(status);
1173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Enumeration failed: %s\n", u_errorName(status));
1177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (originalWord != cloneWord) {
1181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Original and cloned MutableTrieDictionary ended enumeration at different points\n");
1182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Test the data copying constructor for CompactTrieDict, and the data access APIs.
1186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    compact2 = new CompactTrieDictionary(compactDict->data(), status);
1187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("CompactTrieDictionary(const void *,...) failed\n");
1189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (compact2->dataSize() == 0) {
1193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("CompactTrieDictionary->dataSize() == 0\n");
1194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Now count the words via the second dictionary
1198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete enumer1;
1199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumer1 = compact2->openWords(status);
1200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Could not open compact trie dictionary 2 enumerator: %s\n", u_errorName(status));
1202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wordCount != (testCount = enumer1->count(status))) {
1206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("CompactTrieDictionary 2 word count (%d) differs from file word count (%d), with status %s\n",
1207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                testCount, wordCount, u_errorName(status));
1208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanup;
1209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    cleanup:
1212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete compactDict;
1213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete mutableDict;
1214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete breaks;
1215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete[] testFile;
1216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete enumer1;
1217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete mutable2;
1218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete cloneEnum;
1219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete compact2;
1220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_close(originalText);
1221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_close(cloneText);
1222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------
1227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// generalIteratorTest      Given a break iterator and a set of test data,
1229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                          Run the tests and report the results.
1230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------
1232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::generalIteratorTest(RuleBasedBreakIterator& bi, BITestData &td)
1233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi.setText(td.fDataToBreak);
1236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testFirstAndNext(bi, td);
1238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testLastAndPrevious(bi, td);
1240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testFollowing(bi, td);
1242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testPreceding(bi, td);
1243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testIsBoundary(bi, td);
1244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doMultipleSelectionTest(bi, td);
1245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   testFirstAndNext.   Run the iterator forwards in the obvious first(), next()
1250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                       kind of loop.
1251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::testFirstAndNext(RuleBasedBreakIterator& bi, BITestData &td)
1253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
1255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     p;
1256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     lastP = -1;
1257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     tag;
1258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("Test first and next");
1260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi.setText(td.fDataToBreak);
1261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.clearResults();
1262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (p=bi.first(); p!=RuleBasedBreakIterator::DONE; p=bi.next()) {
1264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        td.fActualBreakPositions.addElement(p, status);  // Save result.
1265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        tag = bi.getRuleStatus();
1266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        td.fActualTags.addElement(tag, status);
1267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p <= lastP) {
1268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // If the iterator is not making forward progress, stop.
1269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //  No need to raise an error here, it'll be detected in the normal check of results.
1270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
1271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        lastP = p;
1273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.checkResults("testFirstAndNext", this);
1275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  TestLastAndPrevious.   Run the iterator backwards, starting with last().
1280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void  RBBITest::testLastAndPrevious(RuleBasedBreakIterator& bi,  BITestData &td)
1282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
1284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     p;
1285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     lastP  = 0x7ffffffe;
1286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     tag;
1287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("Test last and previous");
1289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi.setText(td.fDataToBreak);
1290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.clearResults();
1291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (p=bi.last(); p!=RuleBasedBreakIterator::DONE; p=bi.previous()) {
1293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Save break position.  Insert it at start of vector of results, shoving
1294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    already-saved results further towards the end.
1295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        td.fActualBreakPositions.insertElementAt(p, 0, status);
1296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // bi.previous();   // TODO:  Why does this fix things up????
1297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // bi.next();
1298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        tag = bi.getRuleStatus();
1299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        td.fActualTags.insertElementAt(tag, 0, status);
1300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p >= lastP) {
1301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // If the iterator is not making progress, stop.
1302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //  No need to raise an error here, it'll be detected in the normal check of results.
1303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
1304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        lastP = p;
1306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.checkResults("testLastAndPrevious", this);
1308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::testFollowing(RuleBasedBreakIterator& bi, BITestData &td)
1312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
1314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     p;
1315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     tag;
1316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     lastP  = -2;     // A value that will never be returned as a break position.
1317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                 //   cannot be -1; that is returned for DONE.
1318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int         i;
1319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("testFollowing():");
1321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi.setText(td.fDataToBreak);
1322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.clearResults();
1323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Save the starting point, since we won't get that out of following.
1325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    p = bi.first();
1326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.fActualBreakPositions.addElement(p, status);  // Save result.
1327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    tag = bi.getRuleStatus();
1328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.fActualTags.addElement(tag, status);
1329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = 0; i <= td.fDataToBreak.length()+1; i++) {
1331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p = bi.following(i);
1332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p != lastP) {
1333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (p == RuleBasedBreakIterator::DONE) {
1334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // We've reached a new break position.  Save it.
1337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            td.fActualBreakPositions.addElement(p, status);  // Save result.
1338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            tag = bi.getRuleStatus();
1339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            td.fActualTags.addElement(tag, status);
1340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lastP = p;
1341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // The loop normally exits by means of the break in the middle.
1344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Make sure that the index was at the correct position for the break iterator to have
1345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   returned DONE.
1346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (i != td.fDataToBreak.length()) {
1347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("testFollowing():  iterator returned DONE prematurely.");
1348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Full check of all results.
1351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.checkResults("testFollowing", this);
1352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::testPreceding(RuleBasedBreakIterator& bi,  BITestData &td) {
1357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
1358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     p;
1359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     tag;
1360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     lastP  = 0x7ffffffe;
1361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int         i;
1362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("testPreceding():");
1364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi.setText(td.fDataToBreak);
1365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.clearResults();
1366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    p = bi.last();
1368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.fActualBreakPositions.addElement(p, status);
1369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    tag = bi.getRuleStatus();
1370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.fActualTags.addElement(tag, status);
1371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = td.fDataToBreak.length(); i>=-1; i--) {
1373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p = bi.preceding(i);
1374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p != lastP) {
1375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (p == RuleBasedBreakIterator::DONE) {
1376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // We've reached a new break position.  Save it.
1379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            td.fActualBreakPositions.insertElementAt(p, 0, status);
1380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lastP = p;
1381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            tag = bi.getRuleStatus();
1382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            td.fActualTags.insertElementAt(tag, 0, status);
1383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // The loop normally exits by means of the break in the middle.
1386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Make sure that the index was at the correct position for the break iterator to have
1387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   returned DONE.
1388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (i != 0) {
1389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("testPreceding():  iterator returned DONE prematurely.");
1390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Full check of all results.
1393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.checkResults("testPreceding", this);
1394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::testIsBoundary(RuleBasedBreakIterator& bi,  BITestData &td) {
1399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
1400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int         i;
1401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     tag;
1402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("testIsBoundary():");
1404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi.setText(td.fDataToBreak);
1405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.clearResults();
1406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = 0; i <= td.fDataToBreak.length(); i++) {
1408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (bi.isBoundary(i)) {
1409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            td.fActualBreakPositions.addElement(i, status);  // Save result.
1410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            tag = bi.getRuleStatus();
1411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            td.fActualTags.addElement(tag, status);
1412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    td.checkResults("testIsBoundary: ", this);
1415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::doMultipleSelectionTest(RuleBasedBreakIterator& iterator, BITestData &td)
1420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    iterator.setText(td.fDataToBreak);
1422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator* testIterator =(RuleBasedBreakIterator*)iterator.clone();
1424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t offset = iterator.first();
1425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t testOffset;
1426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t count = 0;
1427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("doMultipleSelectionTest text of length: %d", td.fDataToBreak.length());
1429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (*testIterator != iterator)
1431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("clone() or operator!= failed: two clones compared unequal");
1432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    do {
1434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        testOffset = testIterator->first();
1435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        testOffset = testIterator->next(count);
1436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (offset != testOffset)
1437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset);
1438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (offset != RuleBasedBreakIterator::DONE) {
1440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            count++;
1441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            offset = iterator.next();
1442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (offset != RuleBasedBreakIterator::DONE && *testIterator == iterator) {
1444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("operator== failed: Two unequal iterators compared equal. count=%d offset=%d", count, offset);
1445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (count > 10000 || offset == -1) {
1446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    errln("operator== failed too many times. Stopping test.");
1447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if (offset == -1) {
1448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        errln("Does (RuleBasedBreakIterator::DONE == -1)?");
1449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
1450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return;
1451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } while (offset != RuleBasedBreakIterator::DONE);
1455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // now do it backwards...
1457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    offset = iterator.last();
1458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    count = 0;
1459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    do {
1461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        testOffset = testIterator->last();
1462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        testOffset = testIterator->next(count);   // next() with a negative arg is same as previous
1463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (offset != testOffset)
1464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset);
1465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (offset != RuleBasedBreakIterator::DONE) {
1467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            count--;
1468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            offset = iterator.previous();
1469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } while (offset != RuleBasedBreakIterator::DONE);
1471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete testIterator;
1473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------
1477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     other tests
1479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------
1481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestEmptyString()
1482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString text = "";
1484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
1485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BITestData x(status);
1487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ADD_DATACHUNK(x, "", 0, status);           // Break at start of data
1488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator* bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
1489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status))
1490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Failed to create the BreakIterator for default locale in TestEmptyString. - %s", u_errorName(status));
1492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    generalIteratorTest(*bi, x);
1495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
1496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestGetAvailableLocales()
1499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t locCount = 0;
1501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const Locale* locList = BreakIterator::getAvailableLocales(locCount);
1502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (locCount == 0)
1504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln("getAvailableLocales() returned an empty list!");
1505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Just make sure that it's returning good memory.
1506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i;
1507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = 0; i < locCount; ++i) {
1508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln(locList[i].getName());
1509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//Testing the BreakIterator::getDisplayName() function
1513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestGetDisplayName()
1514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString   result;
1516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator::getDisplayName(Locale::getUS(), result);
1518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (Locale::getDefault() == Locale::getUS() && result != "English (United States)")
1519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln("BreakIterator::getDisplayName() failed: expected \"English (United States)\", got \""
1520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                + result);
1521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator::getDisplayName(Locale::getFrance(), Locale::getUS(), result);
1523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (result != "French (France)")
1524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln("BreakIterator::getDisplayName() failed: expected \"French (France)\", got \""
1525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                + result);
1526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test End Behaviour
1529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @bug 4068137
1530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestEndBehaviour()
1532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
1534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString testString("boo.");
1535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *wb = BreakIterator::createWordInstance(Locale::getDefault(), status);
1536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status))
1537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Failed to create the BreakIterator for default locale in TestEndBehaviour. - %s", u_errorName(status));
1539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    wb->setText(testString);
1542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wb->first() != 0)
1544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Didn't get break at beginning of string.");
1545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wb->next() != 3)
1546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Didn't get break before period in \"boo.\"");
1547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (wb->current() != 4 && wb->next() != 4)
1548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Didn't get break at end of string.");
1549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete wb;
1550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
1552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @bug 4153072
1553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestBug4153072() {
1555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
1556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *iter = BreakIterator::createWordInstance(Locale::getDefault(), status);
1557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status))
1558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Failed to create the BreakIterator for default locale in TestBug4153072 - %s", u_errorName(status));
1560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString str("...Hello, World!...");
1563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t begin = 3;
1564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t end = str.length() - 3;
1565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool onBoundary;
1566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    StringCharacterIterator* textIterator = new StringCharacterIterator(str, begin, end, begin);
1568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    iter->adoptText(textIterator);
1569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int index;
1570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Note: with the switch to UText, there is no way to restrict the
1571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //       iteration range to begin at an index other than zero.
1572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //       String character iterators created with a non-zero bound are
1573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //         treated by RBBI as being empty.
1574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (index = -1; index < begin + 1; ++index) {
1575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        onBoundary = iter->isBoundary(index);
1576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (index == 0?  !onBoundary : onBoundary) {
1577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"Didn't handle isBoundary correctly with offset = " + index +
1578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            " and begin index = " + begin);
1579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete iter;
1582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Test for problem reported by Ashok Matoria on 9 July 2007
1587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    One.<kSoftHyphen><kSpace>Two.
1588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    Sentence break at start (0) and then on calling next() it breaks at
1590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   'T' of "Two". Now, at this point if I do next() and
1591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    then previous(), it breaks at <kSOftHyphen> instead of 'T' of "Two".
1592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestBug5775() {
1594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
1595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *bi = BreakIterator::createSentenceInstance(Locale::getEnglish(), status);
1596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT_SUCCESS(status);
1597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Check for status first for better handling of no data errors.
1601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT(bi != NULL);
1602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (bi == NULL) {
1603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString s("One.\\u00ad Two.", -1, US_INV);
1607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //               01234      56789
1608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    s = s.unescape();
1609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi->setText(s);
1610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int pos = bi->next();
1611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT(pos == 6);
1612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pos = bi->next();
1613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT(pos == 10);
1614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pos = bi->previous();
1615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT(pos == 6);
1616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
1617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test Japanese Line Break
1623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @bug 4095322
1624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestJapaneseLineBreak()
1626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0
1628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Test needs updating some more...   Dump it for now.
1629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Change for Unicode TR 14:  Punctuation characters with categories Pi and Pf do not count
1632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //        as opening and closing punctuation for line breaking.
1633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //        Also, \u30fc and \u30fe are not counted as hyphens.   Remove these chars
1634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //        from these tests.    6-13-2002
1635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
1637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString testString = CharsToUnicodeString("\\u4e00x\\u4e8c");
1638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString precedingChars = CharsToUnicodeString(
1639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //"([{\\u00ab$\\u00a5\\u00a3\\u00a4\\u2018\\u201a\\u201c\\u201e\\u201b\\u201f");
1640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "([{$\\u00a5\\u00a3\\u00a4\\u201a\\u201e");
1641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString followingChars = CharsToUnicodeString(
1642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // ")]}\\u00bb!%,.\\u3001\\u3002\\u3063\\u3083\\u3085\\u3087\\u30c3\\u30e3\\u30e5\\u30e7\\u30fc"
1643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ")]}!%,.\\u3001\\u3002\\u3063\\u3083\\u3085\\u3087\\u30c3\\u30e3\\u30e5\\u30e7"
1644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // ":;\\u309b\\u309c\\u3005\\u309d\\u309e\\u30fd\\u30fe\\u2019\\u201d\\u00b0\\u2032\\u2033\\u2034"
1645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ":;\\u309b\\u309c\\u3005\\u309d\\u309e\\u30fd\\u00b0\\u2032\\u2033\\u2034"
1646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u2030\\u2031\\u2103\\u2109\\u00a2\\u0300\\u0301\\u0302");
1647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *iter = BreakIterator::createLineInstance(Locale::getJapan(), status);
1648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i;
1650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status))
1651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Failed to create the BreakIterator for Japanese locale in TestJapaneseLineBreak.\n");
1653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = 0; i < precedingChars.length(); i++) {
1657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        testString.setCharAt(1, precedingChars[i]);
1658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        iter->setText(testString);
1659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t j = iter->first();
1660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (j != 0)
1661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("ja line break failure: failed to start at 0");
1662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        j = iter->next();
1663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (j != 1)
1664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("ja line break failure: failed to stop before '" + UCharToUnicodeString(precedingChars[i])
1665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        + "' (" + ((int)(precedingChars[i])) + ")");
1666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        j = iter->next();
1667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (j != 3)
1668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("ja line break failure: failed to skip position after '" + UCharToUnicodeString(precedingChars[i])
1669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        + "' (" + ((int)(precedingChars[i])) + ")");
1670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = 0; i < followingChars.length(); i++) {
1673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        testString.setCharAt(1, followingChars[i]);
1674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        iter->setText(testString);
1675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int j = iter->first();
1676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (j != 0)
1677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("ja line break failure: failed to start at 0");
1678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        j = iter->next();
1679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (j != 2)
1680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("ja line break failure: failed to skip position before '" + UCharToUnicodeString(followingChars[i])
1681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        + "' (" + ((int)(followingChars[i])) + ")");
1682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        j = iter->next();
1683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (j != 3)
1684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("ja line break failure: failed to stop after '" + UCharToUnicodeString(followingChars[i])
1685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        + "' (" + ((int)(followingChars[i])) + ")");
1686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete iter;
1688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
1689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------
1693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   RBBITest::Extended    Run  RBBI Tests from an external test data file
1695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------
1697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct TestParams {
1699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator   *bi;
1700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString    dataToBreak;
1701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector32       *expectedBreaks;
1702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector32       *srcLine;
1703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector32       *srcCol;
1704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
1705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::executeTest(TestParams *t) {
1707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    bp;
1708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    prevBP;
1709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    i;
1710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (t->bi == NULL) {
1712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    t->bi->setText(t->dataToBreak);
1716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Run the iterator forward
1718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    prevBP = -1;
1720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) {
1721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (prevBP ==  bp) {
1722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Fail for lack of forward progress.
1723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Forward Iteration, no forward progress.  Break Pos=%4d  File line,col=%4d,%4d",
1724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));
1725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
1726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Check that there were we didn't miss an expected break between the last one
1729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //  and this one.
1730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i=prevBP+1; i<bp; i++) {
1731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (t->expectedBreaks->elementAti(i) != 0) {
1732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                int expected[] = {0, i};
1733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                printStringBreaks(t->dataToBreak, expected, 2);
1734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("Forward Iteration, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",
1735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));
1736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Check that the break we did find was expected
1740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (t->expectedBreaks->elementAti(bp) == 0) {
1741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int expected[] = {0, bp};
1742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            printStringBreaks(t->dataToBreak, expected, 2);
1743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Forward Iteration, break found, but not expected.  Pos=%4d  File line,col= %4d,%4d",
1744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));
1745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // The break was expected.
1747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   Check that the {nnn} tag value is correct.
1748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t expectedTagVal = t->expectedBreaks->elementAti(bp);
1749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (expectedTagVal == -1) {
1750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                expectedTagVal = 0;
1751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t line = t->srcLine->elementAti(bp);
1753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();
1754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (rs != expectedTagVal) {
1755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("Incorrect status for forward break.  Pos=%4d  File line,col= %4d,%4d.\n"
1756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      "          Actual, Expected status = %4d, %4d",
1757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal);
1758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        prevBP = bp;
1763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Verify that there were no missed expected breaks after the last one found
1766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i=prevBP+1; i<t->expectedBreaks->size(); i++) {
1767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (t->expectedBreaks->elementAti(i) != 0) {
1768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Forward Iteration, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",
1769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));
1770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Run the iterator backwards, verify that the same breaks are found.
1775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    prevBP = t->dataToBreak.length()+2;  // start with a phony value for the last break pos seen.
1777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous()) {
1778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (prevBP ==  bp) {
1779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Fail for lack of progress.
1780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Reverse Iteration, no progress.  Break Pos=%4d  File line,col=%4d,%4d",
1781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));
1782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
1783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Check that there were we didn't miss an expected break between the last one
1786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //  and this one.  (UVector returns zeros for index out of bounds.)
1787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i=prevBP-1; i>bp; i--) {
1788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (t->expectedBreaks->elementAti(i) != 0) {
1789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("Reverse Itertion, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",
1790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));
1791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Check that the break we did find was expected
1795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (t->expectedBreaks->elementAti(bp) == 0) {
1796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Reverse Itertion, break found, but not expected.  Pos=%4d  File line,col= %4d,%4d",
1797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                   bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp));
1798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // The break was expected.
1800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   Check that the {nnn} tag value is correct.
1801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t expectedTagVal = t->expectedBreaks->elementAti(bp);
1802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (expectedTagVal == -1) {
1803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                expectedTagVal = 0;
1804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int line = t->srcLine->elementAti(bp);
1806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();
1807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (rs != expectedTagVal) {
1808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("Incorrect status for reverse break.  Pos=%4d  File line,col= %4d,%4d.\n"
1809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      "          Actual, Expected status = %4d, %4d",
1810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal);
1811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        prevBP = bp;
1815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Verify that there were no missed breaks prior to the last one found
1818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i=prevBP-1; i>=0; i--) {
1819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (t->expectedBreaks->elementAti(i) != 0) {
1820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Forward Itertion, break expected, but not found.  Pos=%4d  File line,col= %4d,%4d",
1821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      i, t->srcLine->elementAti(i), t->srcCol->elementAti(i));
1822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestExtended() {
1828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS
1829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode      status  = U_ZERO_ERROR;
1830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Locale          locale("");
1831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString       rules;
1833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TestParams          tp;
1834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    tp.bi             = NULL;
1835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    tp.expectedBreaks = new UVector32(status);
1836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    tp.srcLine        = new UVector32(status);
1837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    tp.srcCol         = new UVector32(status);
1838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher      localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_]*) *>"), 0, status);
1840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(status));
1842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Open and read the test data file.
1847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *testDataDirectory = IntlTest::getSourceTestData(status);
1849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char testFileName[1000];
1850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFileName)) {
1851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Can't open test data.  Path too long.");
1852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    strcpy(testFileName, testDataDirectory);
1855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    strcat(testFileName, "rbbitst.txt");
1856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int    len;
1858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status);
1859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return; /* something went wrong, error already output */
1861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Put the test data into a UnicodeString
1868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString testString(FALSE, testFile, len);
1870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enum EParseState{
1872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        PARSE_COMMENT,
1873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        PARSE_TAG,
1874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        PARSE_DATA,
1875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        PARSE_NUM
1876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    parseState = PARSE_TAG;
1878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    EParseState savedState = PARSE_TAG;
1880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar CH_LF        = 0x0a;
1882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar CH_CR        = 0x0d;
1883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar CH_HASH      = 0x23;
1884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*static const UChar CH_PERIOD    = 0x2e;*/
1885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar CH_LT        = 0x3c;
1886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar CH_GT        = 0x3e;
1887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar CH_BACKSLASH = 0x5c;
1888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar CH_BULLET    = 0x2022;
1889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    lineNum  = 1;
1891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    colStart = 0;
1892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    column   = 0;
1893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    charIdx  = 0;
1894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    tagValue = 0;       // The numeric value of a <nnn> tag.
1896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (charIdx = 0; charIdx < len; ) {
1898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_ZERO_ERROR;
1899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar  c = testString.charAt(charIdx);
1900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        charIdx++;
1901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (c == CH_CR && charIdx<len && testString.charAt(charIdx) == CH_LF) {
1902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // treat CRLF as a unit
1903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c = CH_LF;
1904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            charIdx++;
1905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (c == CH_LF || c == CH_CR) {
1907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lineNum++;
1908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            colStart = charIdx;
1909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        column = charIdx - colStart + 1;
1911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        switch (parseState) {
1913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case PARSE_COMMENT:
1914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (c == 0x0a || c == 0x0d) {
1915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                parseState = savedState;
1916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
1918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case PARSE_TAG:
1920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            {
1921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (c == CH_HASH) {
1922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                parseState = PARSE_COMMENT;
1923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                savedState = PARSE_TAG;
1924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (u_isUWhiteSpace(c)) {
1927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testString.compare(charIdx-1, 6, "<word>") == 0) {
1930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                delete tp.bi;
1931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.bi = BreakIterator::createWordInstance(locale,  status);
1932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                charIdx += 5;
1933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testString.compare(charIdx-1, 6, "<char>") == 0) {
1936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                delete tp.bi;
1937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.bi = BreakIterator::createCharacterInstance(locale,  status);
1938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                charIdx += 5;
1939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testString.compare(charIdx-1, 6, "<line>") == 0) {
1942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                delete tp.bi;
1943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.bi = BreakIterator::createLineInstance(locale,  status);
1944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                charIdx += 5;
1945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testString.compare(charIdx-1, 6, "<sent>") == 0) {
1948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                delete tp.bi;
1949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.bi = NULL;
1950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.bi = BreakIterator::createSentenceInstance(locale,  status);
1951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                charIdx += 5;
1952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testString.compare(charIdx-1, 7, "<title>") == 0) {
1955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                delete tp.bi;
1956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.bi = BreakIterator::createTitleInstance(locale,  status);
1957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                charIdx += 6;
1958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // <locale  loc_name>
1962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            localeMatcher.reset(testString);
1963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (localeMatcher.lookingAt(charIdx-1, status)) {
1964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UnicodeString localeName = localeMatcher.group(1, status);
1965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                char localeName8[100];
1966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                localeName.extract(0, localeName.length(), localeName8, sizeof(localeName8), 0);
1967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                locale = Locale::createFromName(localeName8);
1968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                charIdx += localeMatcher.group(0, status).length();
1969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT_SUCCESS(status);
1970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testString.compare(charIdx-1, 6, "<data>") == 0) {
1973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                parseState = PARSE_DATA;
1974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                charIdx += 5;
1975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.dataToBreak = "";
1976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.expectedBreaks->removeAllElements();
1977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcCol ->removeAllElements();
1978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcLine->removeAllElements();
1979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("line %d: Tag expected in test file.", lineNum);
1983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            parseState = PARSE_COMMENT;
1984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            savedState = PARSE_DATA;
1985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            goto end_test; // Stop the test.
1986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
1988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case PARSE_DATA:
1990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (c == CH_BULLET) {
1991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                int32_t  breakIdx = tp.dataToBreak.length();
1992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.expectedBreaks->setSize(breakIdx+1);
1993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.expectedBreaks->setElementAt(-1, breakIdx);
1994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcLine->setSize(breakIdx+1);
1995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcLine->setElementAt(lineNum, breakIdx);
1996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcCol ->setSize(breakIdx+1);
1997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcCol ->setElementAt(column, breakIdx);
1998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testString.compare(charIdx-1, 7, "</data>") == 0) {
2002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Add final entry to mappings from break location to source file position.
2003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //  Need one extra because last break position returned is after the
2004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //    last char in the data, not at the last char.
2005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcLine->addElement(lineNum, status);
2006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcCol ->addElement(column, status);
2007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                parseState = PARSE_TAG;
2009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                charIdx += 6;
2010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // RUN THE TEST!
2012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                executeTest(&tp);
2013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) {
2017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Named character, e.g. \N{COMBINING GRAVE ACCENT}
2018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Get the code point from the name and insert it into the test data.
2019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //   (Damn, no API takes names in Unicode  !!!
2020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //    we've got to take it back to char *)
2021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, charIdx);
2022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                int32_t nameLength = nameEndIdx - (charIdx+2);
2023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                char charNameBuf[200];
2024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UChar32 theChar = -1;
2025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (nameEndIdx != -1) {
2026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UErrorCode status = U_ZERO_ERROR;
2027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    testString.extract(charIdx+2, nameLength, charNameBuf, sizeof(charNameBuf));
2028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    charNameBuf[sizeof(charNameBuf)-1] = 0;
2029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    theChar = u_charFromName(U_UNICODE_CHAR_NAME, charNameBuf, &status);
2030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if (U_FAILURE(status)) {
2031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        theChar = -1;
2032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (theChar == -1) {
2035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    errln("Error in named character in test file at line %d, col %d",
2036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        lineNum, column);
2037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                } else {
2038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // Named code point was recognized.  Insert it
2039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    //   into the test data.
2040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    tp.dataToBreak.append(theChar);
2041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    while (tp.dataToBreak.length() > tp.srcLine->size()) {
2042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        tp.srcLine->addElement(lineNum, status);
2043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        tp.srcCol ->addElement(column, status);
2044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (nameEndIdx > charIdx) {
2047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    charIdx = nameEndIdx+1;
2048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testString.compare(charIdx-1, 2, "<>") == 0) {
2057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                charIdx++;
2058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                int32_t  breakIdx = tp.dataToBreak.length();
2059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.expectedBreaks->setSize(breakIdx+1);
2060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.expectedBreaks->setElementAt(-1, breakIdx);
2061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcLine->setSize(breakIdx+1);
2062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcLine->setElementAt(lineNum, breakIdx);
2063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcCol ->setSize(breakIdx+1);
2064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcCol ->setElementAt(column, breakIdx);
2065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (c == CH_LT) {
2069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tagValue   = 0;
2070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                parseState = PARSE_NUM;
2071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (c == CH_HASH && column==3) {   // TODO:  why is column off so far?
2075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                parseState = PARSE_COMMENT;
2076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                savedState = PARSE_DATA;
2077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (c == CH_BACKSLASH) {
2081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Check for \ at end of line, a line continuation.
2082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //     Advance over (discard) the newline
2083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UChar32 cp = testString.char32At(charIdx);
2084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (cp == CH_CR && charIdx<len && testString.charAt(charIdx+1) == CH_LF) {
2085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // We have a CR LF
2086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    //  Need an extra increment of the input ptr to move over both of them
2087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    charIdx++;
2088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (cp == CH_LF || cp == CH_CR) {
2090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    lineNum++;
2091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    colStart = charIdx;
2092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    charIdx++;
2093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    break;
2094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Let unescape handle the back slash.
2097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                cp = testString.unescapeAt(charIdx);
2098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (cp != -1) {
2099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // Escape sequence was recognized.  Insert the char
2100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    //   into the test data.
2101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    tp.dataToBreak.append(cp);
2102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    while (tp.dataToBreak.length() > tp.srcLine->size()) {
2103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        tp.srcLine->addElement(lineNum, status);
2104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        tp.srcCol ->addElement(column, status);
2105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    break;
2107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Not a recognized backslash escape sequence.
2111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Take the next char as a literal.
2112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //  TODO:  Should this be an error?
2113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c = testString.charAt(charIdx);
2114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                charIdx = testString.moveIndex32(charIdx, 1);
2115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Normal, non-escaped data char.
2118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            tp.dataToBreak.append(c);
2119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Save the mapping from offset in the data to line/column numbers in
2121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   the original input file.  Will be used for better error messages only.
2122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   If there's an expected break before this char, the slot in the mapping
2123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //     vector will already be set for this char; don't overwrite it.
2124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (tp.dataToBreak.length() > tp.srcLine->size()) {
2125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcLine->addElement(lineNum, status);
2126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcCol ->addElement(column, status);
2127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
2129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case PARSE_NUM:
2132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // We are parsing an expected numeric tag value, like <1234>,
2133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   within a chunk of data.
2134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (u_isUWhiteSpace(c)) {
2135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (c == CH_GT) {
2139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Finished the number.  Add the info to the expected break data,
2140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //   and switch parse state back to doing plain data.
2141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                parseState = PARSE_DATA;
2142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (tagValue == 0) {
2143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    tagValue = -1;
2144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                int32_t  breakIdx = tp.dataToBreak.length();
2146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.expectedBreaks->setSize(breakIdx+1);
2147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.expectedBreaks->setElementAt(tagValue, breakIdx);
2148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcLine->setSize(breakIdx+1);
2149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcLine->setElementAt(lineNum, breakIdx);
2150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcCol ->setSize(breakIdx+1);
2151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tp.srcCol ->setElementAt(column, breakIdx);
2152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (u_isdigit(c)) {
2156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tagValue = tagValue*10 + u_charDigitValue(c);
2157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Syntax Error in test file at line %d, col %d",
2161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                lineNum, column);
2162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            parseState = PARSE_COMMENT;
2163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            goto end_test; // Stop the test
2164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
2165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(status)) {
2169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("ICU Error %s while parsing test file at line %d.",
2170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                u_errorName(status), lineNum);
2171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            status = U_ZERO_ERROR;
2172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            goto end_test; // Stop the test
2173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)end_test:
2178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete tp.bi;
2179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete tp.expectedBreaks;
2180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete tp.srcLine;
2181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete tp.srcCol;
2182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete [] testFile;
2183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
2184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestThaiBreaks() {
2187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status=U_ZERO_ERROR;
2188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator* b;
2189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Locale locale = Locale("th");
2190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t p, index;
2191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar c[]= {
2192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B,
2193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19,
2194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            0x0E16, 0x0E49, 0x0E33, 0x0000
2195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
2196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t expectedWordResult[] = {
2197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            2, 3, 6, 10, 11, 15, 17, 20, 22
2198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
2199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t expectedLineResult[] = {
2200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            3, 6, 11, 15, 17, 20, 22
2201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
2202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t size = u_strlen(c);
2204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString text=UnicodeString(c);
2205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    b = BreakIterator::createWordInstance(locale, status);
2207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
2208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Unable to create thai word break iterator. - %s", u_errorName(status));
2209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
2210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    b->setText(text);
2212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    p = index = 0;
2213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while ((p=b->next())!=BreakIterator::DONE && p < size) {
2214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p != expectedWordResult[index++]) {
2215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Incorrect break given by thai word break iterator. Expected: %d  Got: %d", expectedWordResult[index-1], p);
2216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete b;
2219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    b = BreakIterator::createLineInstance(locale, status);
2221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
2222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        printf("Unable to create thai line break iterator.\n");
2223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
2224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    b->setText(text);
2226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    p = index = 0;
2227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while ((p=b->next())!=BreakIterator::DONE && p < size) {
2228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p != expectedLineResult[index++]) {
2229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Incorrect break given by thai line break iterator. Expected: %d  Got: %d", expectedLineResult[index-1], p);
2230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete b;
2234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// UBreakIteratorType UBRK_WORD, Locale "en_US_POSIX"
2237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Words don't include colon or period (cldrbug #1969).
2238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char    posxWordText[]     = "Can't have breaks in xx:yy or struct.field for CS-types.";
2239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t posxWordTOffsets[] = { 5, 6, 10, 11, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30, 36, 37, 42, 43, 46, 47, 49, 50, 55, 56 };
2240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t posxWordROffsets[] = { 5, 6, 10, 11, 17, 18, 20, 21,         26, 27, 29, 30,         42, 43, 46, 47, 49, 50, 55, 56 };
2241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// UBreakIteratorType UBRK_WORD, Locale "ja"
2243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Don't break in runs of hiragana or runs of ideograph, where the latter includes \u3005 \u3007 \u303B (cldrbug #2009).
2244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char    jaWordText[]     = "\\u79C1\\u9054\\u306B\\u4E00\\u3007\\u3007\\u3007\\u306E\\u30B3\\u30F3\\u30D4\\u30E5\\u30FC\\u30BF"
2245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                        "\\u304C\\u3042\\u308B\\u3002\\u5948\\u3005\\u306F\\u30EF\\u30FC\\u30C9\\u3067\\u3042\\u308B\\u3002";
2246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0
2247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t jaWordTOffsets[] = {    2, 3,          7, 8, 14,         17, 18,     20, 21, 24,         27, 28 };
2248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t jaWordROffsets[] = { 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28 };
2249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
2250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// There's no separate Japanese word break iterator. Root is the same as Japanese.
2251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Our dictionary-based iterator has to be tweaked to better handle U+3005,
2252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// U+3007, U+300B and some other cases.
2253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t jaWordTOffsets[] = { 1, 2, 3, 4, 5,    7, 8, 12, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 25, 27, 28 };
2254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t jaWordROffsets[] = { 1, 2, 3, 4, 5,    7, 8, 12, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 25, 27, 28 };
2255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// UBreakIteratorType UBRK_SENTENCE, Locale "el"
2257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Add break after Greek question mark (cldrbug #2069).
2258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char    elSentText[]     = "\\u0391\\u03B2, \\u03B3\\u03B4; \\u0395 \\u03B6\\u03B7\\u037E \\u0398 \\u03B9\\u03BA. "
2259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                        "\\u039B\\u03BC \\u03BD\\u03BE! \\u039F\\u03C0, \\u03A1\\u03C2? \\u03A3";
2260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t elSentTOffsets[] = { 8, 14, 20, 27, 35, 36 };
2261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t elSentROffsets[] = {        20, 27, 35, 36 };
2262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// UBreakIteratorType UBRK_CHARACTER, Locale "th"
2264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Clusters should not include spacing Thai/Lao vowels (prefix or postfix), except for [SARA] AM (cldrbug #2161).
2265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char    thCharText[]     = "\\u0E01\\u0E23\\u0E30\\u0E17\\u0E48\\u0E2D\\u0E21\\u0E23\\u0E08\\u0E19\\u0E32 "
2266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                        "(\\u0E2A\\u0E38\\u0E0A\\u0E32\\u0E15\\u0E34-\\u0E08\\u0E38\\u0E11\\u0E32\\u0E21\\u0E32\\u0E28) "
2267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                        "\\u0E40\\u0E14\\u0E47\\u0E01\\u0E21\\u0E35\\u0E1B\\u0E31\\u0E0D\\u0E2B\\u0E32 ";
2268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t thCharTOffsets[] = { 1, 2, 3, 5, 6, 7, 8, 9, 10, 11,
2269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                          12, 13, 15, 16, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28,
2270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                          29, 30, 32, 33, 35, 37, 38, 39, 40, 41 };
2271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t thCharROffsets[] = { 1,    3, 5, 6, 7, 8, 9,     11,
2272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                          12, 13, 15,     17, 19, 20, 22,     24,     26, 27, 28,
2273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                          29,     32, 33, 35, 37, 38,     40, 41 };
2274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct {
2276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBreakIteratorType  type;
2277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *        locale;
2278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *        escapedText;
2279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const int32_t *     tailoredOffsets;
2280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             tailoredOffsetsCount;
2281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const int32_t *     rootOffsets;
2282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             rootOffsetsCount;
2283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} TailoredBreakItem;
2284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ARRAY_PTR_LEN(array) (array),(sizeof(array)/sizeof(array[0]))
2286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const TailoredBreakItem tbItems[] = {
2288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    { UBRK_WORD,      "en_US_POSIX", posxWordText, ARRAY_PTR_LEN(posxWordTOffsets), ARRAY_PTR_LEN(posxWordROffsets) },
2289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    { UBRK_WORD,      "ja",          jaWordText,   ARRAY_PTR_LEN(jaWordTOffsets),   ARRAY_PTR_LEN(jaWordROffsets)   },
2290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    { UBRK_SENTENCE,  "el",          elSentText,   ARRAY_PTR_LEN(elSentTOffsets),   ARRAY_PTR_LEN(elSentROffsets)   },
2291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    { UBRK_CHARACTER, "th",          thCharText,   ARRAY_PTR_LEN(thCharTOffsets),   ARRAY_PTR_LEN(thCharROffsets)   },
2292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    { UBRK_CHARACTER, NULL,          NULL,         NULL,0,                          NULL,0                          } // terminator
2293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
2294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void formatOffsets(char* buffer, int32_t buflen, int32_t count, const int32_t* offsets) {
2296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (count-- > 0) {
2297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int writeCount;
2298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        sprintf(buffer, /* buflen, */ " %d%n", *offsets++, &writeCount); /* wants to be snprintf */
2299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        buffer += writeCount;
2300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        buflen -= writeCount;
2301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum { kMaxOffsetCount = 128 };
2305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TBTest(BreakIterator* brkitr, int type, const char *locale, const char* escapedText, const int32_t *expectOffsets, int32_t expectOffsetsCount) {
2307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    brkitr->setText( CharsToUnicodeString(escapedText) );
2308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t foundOffsets[kMaxOffsetCount];
2309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t offset, foundOffsetsCount = 0;
2310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // do forwards iteration test
2311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while ( foundOffsetsCount < kMaxOffsetCount && (offset = brkitr->next()) != BreakIterator::DONE ) {
2312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        foundOffsets[foundOffsetsCount++] = offset;
2313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if ( foundOffsetsCount != expectOffsetsCount || memcmp(expectOffsets, foundOffsets, foundOffsetsCount*sizeof(foundOffsets[0])) != 0 ) {
2315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // log error for forwards test
2316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        char formatExpect[512], formatFound[512];
2317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        formatOffsets(formatExpect, sizeof(formatExpect), expectOffsetsCount, expectOffsets);
2318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        formatOffsets(formatFound, sizeof(formatFound), foundOffsetsCount, foundOffsets);
2319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("For type %d %-5s, text \"%.16s\"...; expect %d offsets:%s; found %d offsets fwd:%s\n",
2320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                type, locale, escapedText, expectOffsetsCount, formatExpect, foundOffsetsCount, formatFound);
2321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
2322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // do backwards iteration test
2323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        --foundOffsetsCount; // back off one from the end offset
2324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while ( foundOffsetsCount > 0 ) {
2325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            offset = brkitr->previous();
2326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if ( offset != foundOffsets[--foundOffsetsCount] ) {
2327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // log error for backwards test
2328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                char formatExpect[512];
2329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                formatOffsets(formatExpect, sizeof(formatExpect), expectOffsetsCount, expectOffsets);
2330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("For type %d %-5s, text \"%.16s\"...; expect %d offsets:%s; found rev offset %d where expect %d\n",
2331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        type, locale, escapedText, expectOffsetsCount, formatExpect, offset, foundOffsets[foundOffsetsCount]);
2332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestTailoredBreaks() {
2339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const TailoredBreakItem * tbItemPtr;
2340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Locale rootLocale = Locale("root");
2341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (tbItemPtr = tbItems; tbItemPtr->escapedText != NULL; ++tbItemPtr) {
2342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        Locale testLocale = Locale(tbItemPtr->locale);
2343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        BreakIterator * tailoredBrkiter = NULL;
2344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        BreakIterator * rootBrkiter = NULL;
2345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
2346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        switch (tbItemPtr->type) {
2347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case UBRK_CHARACTER:
2348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tailoredBrkiter = BreakIterator::createCharacterInstance(testLocale, status);
2349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                rootBrkiter = BreakIterator::createCharacterInstance(rootLocale, status);
2350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case UBRK_WORD:
2352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tailoredBrkiter = BreakIterator::createWordInstance(testLocale, status);
2353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                rootBrkiter = BreakIterator::createWordInstance(rootLocale, status);
2354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case UBRK_LINE:
2356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tailoredBrkiter = BreakIterator::createLineInstance(testLocale, status);
2357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                rootBrkiter = BreakIterator::createLineInstance(rootLocale, status);
2358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case UBRK_SENTENCE:
2360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tailoredBrkiter = BreakIterator::createSentenceInstance(testLocale, status);
2361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                rootBrkiter = BreakIterator::createSentenceInstance(rootLocale, status);
2362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            default:
2364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                status = U_UNSUPPORTED_ERROR;
2365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(status)) {
2368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errcheckln(status, "BreakIterator create failed for type %d, locales root or %s - Error: %s", (int)(tbItemPtr->type), tbItemPtr->locale, u_errorName(status));
2369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
2370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TBTest(tailoredBrkiter, (int)(tbItemPtr->type), tbItemPtr->locale, tbItemPtr->escapedText, tbItemPtr->tailoredOffsets, tbItemPtr->tailoredOffsetsCount);
2372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TBTest(rootBrkiter,     (int)(tbItemPtr->type), "root",            tbItemPtr->escapedText, tbItemPtr->rootOffsets,     tbItemPtr->rootOffsetsCount);
2373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete rootBrkiter;
2375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete tailoredBrkiter;
2376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
2381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  TestDictRules   create a break iterator from source rules that includes a
2383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                  dictionary range.   Regression for bug #7130.  Source rules
2384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                  do not declare a break iterator type (word, line, sentence, etc.
2385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                  but the dictionary code, without a type, would loop.
2386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
2388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestDictRules() {
2389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *rules =  "$dictionary = [a-z]; \n"
2390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         "!!forward; \n"
2391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         "$dictionary $dictionary; \n"
2392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         "!!reverse; \n"
2393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         "$dictionary $dictionary; \n";
2394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *text = "aa";
2395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
2396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UParseError parseError;
2397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator bi(rules, parseError, status);
2399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_SUCCESS(status)) {
2400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString utext = text;
2401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        bi.setText(utext);
2402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t position;
2403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t loops;
2404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (loops = 0; loops<10; loops++) {
2405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            position = bi.next();
2406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (position == RuleBasedBreakIterator::DONE) {
2407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(loops == 1);
2411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
2412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln("Error creating RuleBasedBreakIterator: %s", u_errorName(status));
2413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
2419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    ReadAndConvertFile   Read a text data file, convert it to UChars, and
2421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    return the datain one big UChar * buffer, which the caller must delete.
2422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    parameters:
2424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//          fileName:   the name of the file, with no directory part.  The test data directory
2425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                      is assumed.
2426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//          ulen        an out parameter, receives the actual length (in UChars) of the file data.
2427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//          encoding    The file encoding.  If the file contains a BOM, that will override the encoding
2428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                      specified here.  The BOM, if it exists, will be stripped from the returned data.
2429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                      Pass NULL for the system default encoding.
2430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//          status
2431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    returns:
2432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                      The file data, converted to UChar.
2433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                      The caller must delete this when done with
2434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                           delete [] theBuffer;
2435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    TODO:  This is a clone of RegexTest::ReadAndConvertFile.
2437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//           Move this function to some common place.
2438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------
2440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar *RBBITest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) {
2441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar       *retPtr  = NULL;
2442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char        *fileBuf = NULL;
2443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UConverter* conv     = NULL;
2444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    FILE        *f       = NULL;
2445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ulen = 0;
2447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
2448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return retPtr;
2449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Open the file.
2453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    f = fopen(fileName, "rb");
2455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (f == 0) {
2456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln("Error opening test data file %s\n", fileName);
2457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_FILE_ACCESS_ERROR;
2458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
2459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Read it in
2462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int   fileSize;
2464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int   amt_read;
2465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fseek( f, 0, SEEK_END);
2467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fileSize = ftell(f);
2468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fileBuf = new char[fileSize];
2469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fseek(f, 0, SEEK_SET);
2470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    amt_read = fread(fileBuf, 1, fileSize, f);
2471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (amt_read != fileSize || fileSize <= 0) {
2472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Error reading test data file.");
2473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanUpAndReturn;
2474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Look for a Unicode Signature (BOM) on the data just read
2478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t        signatureLength;
2480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *   fileBufC;
2481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char*    bomEncoding;
2482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fileBufC = fileBuf;
2484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bomEncoding = ucnv_detectUnicodeSignature(
2485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fileBuf, fileSize, &signatureLength, &status);
2486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(bomEncoding!=NULL ){
2487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fileBufC  += signatureLength;
2488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fileSize  -= signatureLength;
2489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        encoding = bomEncoding;
2490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Open a converter to take the rule file to UTF-16
2494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    conv = ucnv_open(encoding, &status);
2496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
2497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        goto cleanUpAndReturn;
2498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Convert the rules to UChar.
2502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Preflight first to determine required buffer size.
2503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ulen = ucnv_toUChars(conv,
2505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        NULL,           //  dest,
2506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0,              //  destCapacity,
2507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fileBufC,
2508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fileSize,
2509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        &status);
2510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (status == U_BUFFER_OVERFLOW_ERROR) {
2511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Buffer Overflow is expected from the preflight operation.
2512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_ZERO_ERROR;
2513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        retPtr = new UChar[ulen+1];
2515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ucnv_toUChars(conv,
2516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            retPtr,       //  dest,
2517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ulen+1,
2518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fileBufC,
2519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fileSize,
2520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            &status);
2521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)cleanUpAndReturn:
2524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fclose(f);
2525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete []fileBuf;
2526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucnv_close(conv);
2527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
2528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
2529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete retPtr;
2530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        retPtr = 0;
2531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ulen   = 0;
2532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
2533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return retPtr;
2534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------------
2539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   Run tests from each of the boundary test data files distributed by the Unicode Consortium
2541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------------
2543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestUnicodeFiles() {
2544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator  *bi;
2545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode               status = U_ZERO_ERROR;
2546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi =  (RuleBasedBreakIterator *)BreakIterator::createCharacterInstance(Locale::getEnglish(), status);
2548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT_SUCCESS(status);
2549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_SUCCESS(status)) {
2550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        runUnicodeTestData("GraphemeBreakTest.txt", bi);
2551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
2553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi =  (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
2555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT_SUCCESS(status);
2556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_SUCCESS(status)) {
2557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        runUnicodeTestData("WordBreakTest.txt", bi);
2558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
2560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi =  (RuleBasedBreakIterator *)BreakIterator::createSentenceInstance(Locale::getEnglish(), status);
2562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT_SUCCESS(status);
2563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_SUCCESS(status)) {
2564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        runUnicodeTestData("SentenceBreakTest.txt", bi);
2565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
2567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi =  (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
2569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT_SUCCESS(status);
2570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_SUCCESS(status)) {
2571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        runUnicodeTestData("LineBreakTest.txt", bi);
2572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
2574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------------
2578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   Run tests from one of the boundary test data files distributed by the Unicode Consortium
2580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------------
2582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *bi) {
2583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS
2584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TODO(andy): Match line break behavior to Unicode 6.0 and remove this time bomb.
2585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UVersionInfo icu4601 = { 4, 6, 0, 1 };
2586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool isICUVersionPast46 = isICUVersionAtLeast(icu4601);
2587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool isLineBreak = 0 == strcmp(fileName, "LineBreakTest.txt");
2588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
2589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Open and read the test data file, put it into a UnicodeString.
2592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *testDataDirectory = IntlTest::getSourceTestData(status);
2594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char testFileName[1000];
2595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFileName)) {
2596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln("Can't open test data.  Path too long.");
2597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
2598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    strcpy(testFileName, testDataDirectory);
2600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    strcat(testFileName, fileName);
2601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("Opening data file %s\n", fileName);
2603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int    len;
2605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status);
2606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (status != U_FILE_ACCESS_ERROR) {
2607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
2608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(testFile != NULL);
2609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status) || testFile == NULL) {
2611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return; /* something went wrong, error already output */
2612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString testFileAsString(TRUE, testFile, len);
2614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Parse the test data file using a regular expression.
2617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Each kind of token is recognized in its own capture group; what type of item was scanned
2618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //     is identified by which group had a match.
2619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //    Caputure Group #                  1          2            3            4           5
2621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //    Parses this item:               divide       x      hex digits   comment \n  unrecognized \n
2622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString tokenExpr("[ \t]*(?:(\\u00F7)|(\\u00D7)|([0-9a-fA-F]+)|((?:#.*?)?$.)|(.*?$.))", -1, US_INV);
2624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher    tokenMatcher(tokenExpr, testFileAsString, UREGEX_MULTILINE | UREGEX_DOTALL, status);
2625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString   testString;
2626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector32       breakPositions(status);
2627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int             lineNumber = 1;
2628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT_SUCCESS(status);
2629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
2630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
2631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Scan through each test case, building up the string to be broken in testString,
2635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   and the positions that should be boundaries in the breakPositions vector.
2636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
2637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int spin = 0;
2638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (tokenMatcher.find()) {
2639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      	if(tokenMatcher.hitEnd()) {
2640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          /* Shouldnt Happen(TM).  This means we didn't find the symbols we were looking for.
2641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             This occurred when the text file was corrupt (wasn't marked as UTF-8)
2642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             and caused an infinite loop here on EBCDIC systems!
2643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          */
2644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          fprintf(stderr,"FAIL: hit end of file %s for the %8dth time- corrupt data file?\r", fileName, ++spin);
2645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          //	   return;
2646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      	}
2647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (tokenMatcher.start(1, status) >= 0) {
2648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Scanned a divide sign, indicating a break position in the test data.
2649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testString.length()>0) {
2650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                breakPositions.addElement(testString.length(), status);
2651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else if (tokenMatcher.start(2, status) >= 0) {
2654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Scanned an 'x', meaning no break at this position in the test data
2655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   Nothing to be done here.
2656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else if (tokenMatcher.start(3, status) >= 0) {
2658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Scanned Hex digits.  Convert them to binary, append to the character data string.
2659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const UnicodeString &hexNumber = tokenMatcher.group(3, status);
2660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int length = hexNumber.length();
2661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (length<=8) {
2662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                char buf[10];
2663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                hexNumber.extract (0, length, buf, sizeof(buf), US_INV);
2664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UChar32 c = (UChar32)strtol(buf, NULL, 16);
2665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (c<=0x10ffff) {
2666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    testString.append(c);
2667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                } else {
2668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    errln("Error: Unicode Character value out of range. \'%s\', line %d.\n",
2669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       fileName, lineNumber);
2670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
2672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("Syntax Error: Hex Unicode Character value must have no more than 8 digits at \'%s\', line %d.\n",
2673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       fileName, lineNumber);
2674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             }
2675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else if (tokenMatcher.start(4, status) >= 0) {
2677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Scanned to end of a line, possibly skipping over a comment in the process.
2678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   If the line from the file contained test data, run the test now.
2679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //
2680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (testString.length() > 0) {
2681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TODO(andy): Remove this time bomb code.
2682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)if (!isLineBreak || isICUVersionPast46 || !(4658 <= lineNumber && lineNumber <= 4758)) {
2683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                checkUnicodeTestCase(fileName, lineNumber, testString, &breakPositions, bi);
2684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Clear out this test case.
2688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //    The string and breakPositions vector will be refilled as the next
2689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //       test case is parsed.
2690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testString.remove();
2691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            breakPositions.removeAllElements();
2692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lineNumber++;
2693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
2694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Scanner catchall.  Something unrecognized appeared on the line.
2695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            char token[16];
2696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UnicodeString uToken = tokenMatcher.group(0, status);
2697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            uToken.extract(0, uToken.length(), token, (uint32_t)sizeof(token));
2698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            token[sizeof(token)-1] = 0;
2699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Syntax error in test data file \'%s\', line %d.  Scanning \"%s\"\n", fileName, lineNumber, token);
2700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Clean up, in preparation for continuing with the next line.
2702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testString.remove();
2703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            breakPositions.removeAllElements();
2704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lineNumber++;
2705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
2707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(status)) {
2708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
2709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete [] testFile;
2713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
2714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------------
2717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   checkUnicodeTestCase()   Run one test case from one of the Unicode Consortium
2719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                            test data files.  Do only a simple, forward-only check -
2720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                            this test is mostly to check that ICU and the Unicode
2721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                            data agree with each other.
2722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------------
2724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::checkUnicodeTestCase(const char *testFileName, int lineNumber,
2725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         const UnicodeString &testString,   // Text data to be broken
2726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         UVector32 *breakPositions,         // Positions where breaks should be found.
2727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         RuleBasedBreakIterator *bi) {
2728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t pos;                 // Break Position in the test string
2729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t expectedI = 0;       // Index of expected break position in the vector of expected results.
2730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t expectedPos;         // Expected break position (index into test string)
2731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi->setText(testString);
2733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pos = bi->first();
2734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pos = bi->next();
2735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (pos != BreakIterator::DONE) {
2737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (expectedI >= breakPositions->size()) {
2738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Test file \"%s\", line %d, unexpected break found at position %d",
2739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                testFileName, lineNumber, pos);
2740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
2741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectedPos = breakPositions->elementAti(expectedI);
2743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (pos < expectedPos) {
2744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Test file \"%s\", line %d, unexpected break found at position %d",
2745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                testFileName, lineNumber, pos);
2746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
2747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (pos > expectedPos) {
2749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Test file \"%s\", line %d, failed to find expected break at position %d",
2750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                testFileName, lineNumber, expectedPos);
2751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
2752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        pos = bi->next();
2754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectedI++;
2755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (pos==BreakIterator::DONE && expectedI<breakPositions->size()) {
2758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("Test file \"%s\", line %d, failed to find expected break at position %d",
2759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testFileName, lineNumber, breakPositions->elementAti(expectedI));
2760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS
2766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------------------
2767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   classs RBBIMonkeyKind
2769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//      Monkey Test for Break Iteration
2771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//      Abstract interface class.   Concrete derived classes independently
2772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//      implement the break rules for different iterator types.
2773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//      The Monkey Test itself uses doesn't know which type of break iterator it is
2775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//      testing, but works purely in terms of the interface defined here.
2776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------------------
2778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class RBBIMonkeyKind {
2779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
2780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Return a UVector of UnicodeSets, representing the character classes used
2781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   for this type of iterator.
2782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  UVector  *charClasses() = 0;
2783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Set the test text on which subsequent calls to next() will operate
2785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  void      setText(const UnicodeString &s) = 0;
2786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Find the next break postion, starting from the prev break position, or from zero.
2788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Return -1 after reaching end of string.
2789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  int32_t   next(int32_t i) = 0;
2790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual ~RBBIMonkeyKind();
2792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode       deferredStatus;
2793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)protected:
2796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBIMonkeyKind();
2797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
2799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
2800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBIMonkeyKind::RBBIMonkeyKind() {
2802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    deferredStatus = U_ZERO_ERROR;
2803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBIMonkeyKind::~RBBIMonkeyKind() {
2806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------------------
2810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   Random Numbers.  Similar to standard lib rand() and srand()
2812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                    Not using library to
2813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                      1.  Get same results on all platforms.
2814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                      2.  Get access to current seed, to more easily reproduce failures.
2815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------------------
2817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t m_seed = 1;
2818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t m_rand()
2820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
2821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    m_seed = m_seed * 1103515245 + 12345;
2822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (uint32_t)(m_seed/65536) % 32768;
2823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------
2827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   class RBBICharMonkey      Character (Grapheme Cluster) specific implementation
2829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                             of RBBIMonkeyKind.
2830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
2831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------
2832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class RBBICharMonkey: public RBBIMonkeyKind {
2833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
2834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBICharMonkey();
2835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual          ~RBBICharMonkey();
2836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  UVector *charClasses();
2837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  void     setText(const UnicodeString &s);
2838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  int32_t  next(int32_t i);
2839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
2840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector   *fSets;
2841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fCRLFSet;
2843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fControlSet;
2844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fExtendSet;
2845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fPrependSet;
2846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fSpacingSet;
2847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fLSet;
2848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fVSet;
2849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fTSet;
2850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fLVSet;
2851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fLVTSet;
2852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fHangulSet;
2853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fAnySet;
2854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *fText;
2856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
2857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBICharMonkey::RBBICharMonkey() {
2860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
2861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fText = NULL;
2863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCRLFSet    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\r\\n]"), status);
2865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fControlSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Control}]"), status);
2866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fExtendSet  = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Extend}]"), status);
2867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fPrependSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Prepend}]"), status);
2868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSpacingSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = SpacingMark}]"), status);
2869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = L}]"), status);
2870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fVSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = V}]"), status);
2871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fTSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = T}]"), status);
2872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLVSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LV}]"), status);
2873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLVTSet     = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LVT}]"), status);
2874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fHangulSet  = new UnicodeSet();
2875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fHangulSet->addAll(*fLSet);
2876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fHangulSet->addAll(*fVSet);
2877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fHangulSet->addAll(*fTSet);
2878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fHangulSet->addAll(*fLVSet);
2879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fHangulSet->addAll(*fLVTSet);
2880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fAnySet     = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u0000-\\U0010ffff]"), status);
2881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets       = new UVector(status);
2883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fCRLFSet,    status);
2884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fControlSet, status);
2885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fExtendSet,  status);
2886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fPrependSet, status);
2887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fSpacingSet, status);
2888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fHangulSet,  status);
2889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fAnySet,     status);
2890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
2891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        deferredStatus = status;
2892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBICharMonkey::setText(const UnicodeString &s) {
2897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fText = &s;
2898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RBBICharMonkey::next(int32_t prevPos) {
2903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int    p0, p1, p2, p3;    // Indices of the significant code points around the
2904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              //   break position being tested.  The candidate break
2905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              //   location is before p2.
2906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int     breakPos = -1;
2908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
2910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(deferredStatus)) {
2912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
2913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Previous break at end of string.  return DONE.
2916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (prevPos >= fText->length()) {
2917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
2918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    p0 = p1 = p2 = p3 = prevPos;
2920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c3 =  fText->char32At(prevPos);
2921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c0 = c1 = c2 = 0;
2922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Loop runs once per "significant" character position in the input text.
2924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (;;) {
2925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Move all of the positions forward in the input string.
2926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p0 = p1;  c0 = c1;
2927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p1 = p2;  c1 = c2;
2928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p2 = p3;  c2 = c3;
2929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Advancd p3 by one codepoint
2931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p3 = fText->moveIndex32(p3, 1);
2932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        c3 = fText->char32At(p3);
2933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p1 == p2) {
2935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Still warming up the loop.  (won't work with zero length strings, but we don't care)
2936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
2937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p2 == fText->length()) {
2939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Reached end of string.  Always a break position.
2940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
2941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule  GB3   CR x LF
2944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //     No Extend or Format characters may appear between the CR and LF,
2945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //     which requires the additional check for p2 immediately following p1.
2946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
2947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
2948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
2949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (GB4).   ( Control | CR | LF ) <break>
2952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fControlSet->contains(c1) ||
2953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c1 == 0x0D ||
2954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c1 == 0x0A)  {
2955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
2956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (GB5)    <break>  ( Control | CR | LF )
2959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
2960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fControlSet->contains(c2) ||
2961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c2 == 0x0D ||
2962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c2 == 0x0A)  {
2963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
2964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (GB6)  L x ( L | V | LV | LVT )
2968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fLSet->contains(c1) &&
2969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               (fLSet->contains(c2)  ||
2970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fVSet->contains(c2)  ||
2971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fLVSet->contains(c2) ||
2972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fLVTSet->contains(c2))) {
2973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
2974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (GB7)    ( LV | V )  x  ( V | T )
2977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fLVSet->contains(c1) || fVSet->contains(c1)) &&
2978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fVSet->contains(c2) || fTSet->contains(c2)))  {
2979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
2980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (GB8)    ( LVT | T)  x T
2983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fLVTSet->contains(c1) || fTSet->contains(c1)) &&
2984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fTSet->contains(c2))  {
2985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
2986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (GB9)    Numeric x ALetter
2989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fExtendSet->contains(c2))  {
2990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
2991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (GB9a)   x  SpacingMark
2994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fSpacingSet->contains(c2)) {
2995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
2996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (GB9b)   Prepend x
2999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fPrependSet->contains(c1)) {
3000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (GB10)  Any  <break>  Any
3004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        break;
3005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breakPos = p2;
3008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return breakPos;
3009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UVector  *RBBICharMonkey::charClasses() {
3014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fSets;
3015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBICharMonkey::~RBBICharMonkey() {
3019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSets;
3020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fCRLFSet;
3021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fControlSet;
3022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fExtendSet;
3023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fPrependSet;
3024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSpacingSet;
3025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fLSet;
3026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fVSet;
3027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fTSet;
3028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fLVSet;
3029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fLVTSet;
3030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fHangulSet;
3031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fAnySet;
3032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------
3035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
3036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   class RBBIWordMonkey      Word Break specific implementation
3037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                             of RBBIMonkeyKind.
3038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
3039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------
3040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class RBBIWordMonkey: public RBBIMonkeyKind {
3041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
3042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBIWordMonkey();
3043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual          ~RBBIWordMonkey();
3044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  UVector *charClasses();
3045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  void     setText(const UnicodeString &s);
3046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual int32_t   next(int32_t i);
3047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
3048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector      *fSets;
3049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fCRSet;
3051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fLFSet;
3052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fNewlineSet;
3053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fKatakanaSet;
3054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fALetterSet;
3055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // TODO(jungshik): Do we still need this change?
3056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // UnicodeSet  *fALetterSet; // matches ALetterPlus in word.txt
3057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fMidNumLetSet;
3058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fMidLetterSet;
3059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fMidNumSet;
3060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fNumericSet;
3061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fFormatSet;
3062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fOtherSet;
3063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fExtendSet;
3064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fExtendNumLetSet;
3065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fDictionaryCjkSet;
3066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher  *fMatcher;
3068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString  *fText;
3070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
3071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBIWordMonkey::RBBIWordMonkey()
3074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
3075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
3076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets            = new UVector(status);
3078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCRSet           = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = CR}]"),           status);
3080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLFSet           = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = LF}]"),           status);
3081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNewlineSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Newline}]"),      status);
3082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fDictionaryCjkSet= new UnicodeSet("[[\\uac00-\\ud7a3][:Han:][:Hiragana:]]", status);
3083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Exclude Hangul syllables from ALetterSet during testing.
3084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Leave CJK dictionary characters out from the monkey tests!
3085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0
3086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fALetterSet      = new UnicodeSet("[\\p{Word_Break = ALetter}"
3087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                      "[\\p{Line_Break = Complex_Context}"
3088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                      "-\\p{Grapheme_Cluster_Break = Extend}"
3089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                      "-\\p{Grapheme_Cluster_Break = Control}"
3090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                      "]]",
3091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                      status);
3092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
3093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fALetterSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ALetter}]"), status);
3094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fALetterSet->removeAll(*fDictionaryCjkSet);
3095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fKatakanaSet     = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Katakana}]"),     status);
3096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fMidNumLetSet    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNumLet}]"),    status);
3097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fMidLetterSet    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidLetter}]"),    status);
3098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fMidNumSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNum}]"),       status);
3099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNumericSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Numeric}[\\uff10-\\uff19]]"),      status);
3100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fFormatSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Format}]"),       status);
3101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fExtendNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ExtendNumLet}]"), status);
3102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fExtendSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Extend}]"),       status);
3103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet        = new UnicodeSet();
3105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(status)) {
3106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      deferredStatus = status;
3107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      return;
3108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->complement();
3111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fCRSet);
3112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fLFSet);
3113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fNewlineSet);
3114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fKatakanaSet);
3115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fALetterSet);
3116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fMidLetterSet);
3117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fMidNumSet);
3118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fNumericSet);
3119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fExtendNumLetSet);
3120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fFormatSet);
3121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fExtendSet);
3122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Inhibit dictionary characters from being tested at all.
3123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fDictionaryCjkSet);
3124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{LineBreak = Complex_Context}]"), status));
3125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fCRSet,        status);
3127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fLFSet,        status);
3128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fNewlineSet,   status);
3129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fALetterSet,   status);
3130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //fSets->addElement(fKatakanaSet,  status); //TODO: work out how to test katakana
3131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fMidLetterSet, status);
3132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fMidNumLetSet, status);
3133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fMidNumSet,    status);
3134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fNumericSet,   status);
3135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fFormatSet,    status);
3136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fExtendSet,    status);
3137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fOtherSet,     status);
3138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fExtendNumLetSet, status);
3139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
3141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        deferredStatus = status;
3142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBIWordMonkey::setText(const UnicodeString &s) {
3146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fText       = &s;
3147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RBBIWordMonkey::next(int32_t prevPos) {
3151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int    p0, p1, p2, p3;    // Indices of the significant code points around the
3152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              //   break position being tested.  The candidate break
3153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              //   location is before p2.
3154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int     breakPos = -1;
3156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
3158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(deferredStatus)) {
3160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
3161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Prev break at end of string.  return DONE.
3164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (prevPos >= fText->length()) {
3165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
3166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    p0 = p1 = p2 = p3 = prevPos;
3168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c3 =  fText->char32At(prevPos);
3169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c0 = c1 = c2 = 0;
3170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Loop runs once per "significant" character position in the input text.
3172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (;;) {
3173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Move all of the positions forward in the input string.
3174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p0 = p1;  c0 = c1;
3175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p1 = p2;  c1 = c2;
3176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p2 = p3;  c2 = c3;
3177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Advancd p3 by    X(Extend | Format)*   Rule 4
3179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    But do not advance over Extend & Format following a new line. (Unicode 5.1 change)
3180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        do {
3181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p3 = fText->moveIndex32(p3, 1);
3182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c3 = fText->char32At(p3);
3183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fCRSet->contains(c2) || fLFSet->contains(c2) || fNewlineSet->contains(c2)) {
3184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               break;
3185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            };
3186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (fFormatSet->contains(c3) || fExtendSet->contains(c3));
3188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p1 == p2) {
3191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Still warming up the loop.  (won't work with zero length strings, but we don't care)
3192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p2 == fText->length()) {
3195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Reached end of string.  Always a break position.
3196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
3197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule  (3)   CR x LF
3200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //     No Extend or Format characters may appear between the CR and LF,
3201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //     which requires the additional check for p2 immediately following p1.
3202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
3203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (c1==0x0D && c2==0x0A) {
3204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (3a)  Break before and after newlines (including CR and LF)
3208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
3209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fCRSet->contains(c1) || fLFSet->contains(c1) || fNewlineSet->contains(c1)) {
3210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
3211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        };
3212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fCRSet->contains(c2) || fLFSet->contains(c2) || fNewlineSet->contains(c2)) {
3213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
3214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        };
3215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (5).   ALetter x ALetter
3217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fALetterSet->contains(c1) &&
3218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fALetterSet->contains(c2))  {
3219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (6)  ALetter  x  (MidLetter | MidNumLet) ALetter
3223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
3224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ( fALetterSet->contains(c1)   &&
3225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             (fMidLetterSet->contains(c2) || fMidNumLetSet->contains(c2)) &&
3226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             fALetterSet->contains(c3)) {
3227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (7)  ALetter (MidLetter | MidNumLet)  x  ALetter
3232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fALetterSet->contains(c0) &&
3233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fMidLetterSet->contains(c1) ||  fMidNumLetSet->contains(c1)) &&
3234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fALetterSet->contains(c2)) {
3235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (8)    Numeric x Numeric
3239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fNumericSet->contains(c1) &&
3240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fNumericSet->contains(c2))  {
3241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (9)    ALetter x Numeric
3245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fALetterSet->contains(c1) &&
3246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fNumericSet->contains(c2))  {
3247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (10)    Numeric x ALetter
3251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fNumericSet->contains(c1) &&
3252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fALetterSet->contains(c2))  {
3253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (11)   Numeric (MidNum | MidNumLet)  x  Numeric
3257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fNumericSet->contains(c0) &&
3258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fMidNumSet->contains(c1) || fMidNumLetSet->contains(c1))  &&
3259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fNumericSet->contains(c2)) {
3260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (12)  Numeric x (MidNum | MidNumLet) Numeric
3264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fNumericSet->contains(c1) &&
3265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fMidNumSet->contains(c2) || fMidNumLetSet->contains(c2))  &&
3266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fNumericSet->contains(c3)) {
3267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (13)  Katakana x Katakana
3271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fKatakanaSet->contains(c1) &&
3272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fKatakanaSet->contains(c2))  {
3273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule 13a
3277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fALetterSet->contains(c1) || fNumericSet->contains(c1) ||
3278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             fKatakanaSet->contains(c1) || fExtendNumLetSet->contains(c1)) &&
3279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             fExtendNumLetSet->contains(c2)) {
3280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
3281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             }
3282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule 13b
3284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fExtendNumLetSet->contains(c1) &&
3285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                (fALetterSet->contains(c2) || fNumericSet->contains(c2) ||
3286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fKatakanaSet->contains(c2)))  {
3287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
3288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             }
3289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule 14.  Break found here.
3291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        break;
3292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breakPos = p2;
3295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return breakPos;
3296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UVector  *RBBIWordMonkey::charClasses() {
3300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fSets;
3301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBIWordMonkey::~RBBIWordMonkey() {
3305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSets;
3306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fCRSet;
3307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fLFSet;
3308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fNewlineSet;
3309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fKatakanaSet;
3310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fALetterSet;
3311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fMidNumLetSet;
3312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fMidLetterSet;
3313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fMidNumSet;
3314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fNumericSet;
3315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fFormatSet;
3316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fExtendSet;
3317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fExtendNumLetSet;
3318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fOtherSet;
3319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------
3325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
3326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   class RBBISentMonkey      Sentence Break specific implementation
3327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                             of RBBIMonkeyKind.
3328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
3329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------
3330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class RBBISentMonkey: public RBBIMonkeyKind {
3331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
3332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBISentMonkey();
3333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual          ~RBBISentMonkey();
3334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  UVector *charClasses();
3335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  void     setText(const UnicodeString &s);
3336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual int32_t   next(int32_t i);
3337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
3338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int               moveBack(int posFrom);
3339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int               moveForward(int posFrom);
3340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32           cAt(int pos);
3341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector      *fSets;
3343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fSepSet;
3345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fFormatSet;
3346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fSpSet;
3347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fLowerSet;
3348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fUpperSet;
3349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fOLetterSet;
3350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fNumericSet;
3351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fATermSet;
3352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fSContinueSet;
3353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fSTermSet;
3354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fCloseSet;
3355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fOtherSet;
3356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fExtendSet;
3357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString  *fText;
3359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
3361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBISentMonkey::RBBISentMonkey()
3363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
3364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
3365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets            = new UVector(status);
3367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Separator Set Note:  Beginning with Unicode 5.1, CR and LF were removed from the separator
3369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //                       set and made into character classes of their own.  For the monkey impl,
3370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //                       they remain in SEP, since Sep always appears with CR and LF in the rules.
3371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSepSet          = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sep} \\u000a \\u000d]"),     status);
3372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fFormatSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Format}]"),    status);
3373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSpSet           = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sp}]"),        status);
3374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLowerSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Lower}]"),     status);
3375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fUpperSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Upper}]"),     status);
3376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOLetterSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = OLetter}]"),   status);
3377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNumericSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Numeric}]"),   status);
3378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fATermSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = ATerm}]"),     status);
3379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSContinueSet    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = SContinue}]"), status);
3380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSTermSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = STerm}]"),     status);
3381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCloseSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Close}]"),     status);
3382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fExtendSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Extend}]"),    status);
3383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet        = new UnicodeSet();
3384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(status)) {
3386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      deferredStatus = status;
3387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      return;
3388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->complement();
3391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fSepSet);
3392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fFormatSet);
3393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fSpSet);
3394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fLowerSet);
3395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fUpperSet);
3396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fOLetterSet);
3397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fNumericSet);
3398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fATermSet);
3399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fSContinueSet);
3400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fSTermSet);
3401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fCloseSet);
3402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOtherSet->removeAll(*fExtendSet);
3403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fSepSet,       status);
3405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fFormatSet,    status);
3406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fSpSet,        status);
3407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fLowerSet,     status);
3408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fUpperSet,     status);
3409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fOLetterSet,   status);
3410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fNumericSet,   status);
3411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fATermSet,     status);
3412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fSContinueSet, status);
3413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fSTermSet,     status);
3414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fCloseSet,     status);
3415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fOtherSet,     status);
3416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fExtendSet,    status);
3417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
3419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        deferredStatus = status;
3420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBISentMonkey::setText(const UnicodeString &s) {
3426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fText       = &s;
3427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UVector  *RBBISentMonkey::charClasses() {
3430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fSets;
3431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  moveBack()   Find the "significant" code point preceding the index i.
3435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//               Skips over ($Extend | $Format)* .
3436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
3437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int RBBISentMonkey::moveBack(int i) {
3438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (i <= 0) {
3439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
3440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32   c;
3442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t   j = i;
3443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    do {
3444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        j = fText->moveIndex32(j, -1);
3445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        c = fText->char32At(j);
3446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (j>0 &&(fFormatSet->contains(c) || fExtendSet->contains(c)));
3448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return j;
3449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }
3451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int RBBISentMonkey::moveForward(int i) {
3454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (i>=fText->length()) {
3455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return fText->length();
3456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32   c;
3458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t   j = i;
3459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    do {
3460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        j = fText->moveIndex32(j, 1);
3461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        c = cAt(j);
3462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (fFormatSet->contains(c) || fExtendSet->contains(c));
3464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return j;
3465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 RBBISentMonkey::cAt(int pos) {
3468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (pos<0 || pos>=fText->length()) {
3469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
3470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
3471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return fText->char32At(pos);
3472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RBBISentMonkey::next(int32_t prevPos) {
3476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int    p0, p1, p2, p3;    // Indices of the significant code points around the
3477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              //   break position being tested.  The candidate break
3478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              //   location is before p2.
3479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int     breakPos = -1;
3481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
3483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 c;
3484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(deferredStatus)) {
3486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
3487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Prev break at end of string.  return DONE.
3490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (prevPos >= fText->length()) {
3491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
3492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    p0 = p1 = p2 = p3 = prevPos;
3494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c3 =  fText->char32At(prevPos);
3495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c0 = c1 = c2 = 0;
3496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Loop runs once per "significant" character position in the input text.
3498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (;;) {
3499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Move all of the positions forward in the input string.
3500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p0 = p1;  c0 = c1;
3501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p1 = p2;  c1 = c2;
3502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p2 = p3;  c2 = c3;
3503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Advancd p3 by    X(Extend | Format)*   Rule 4
3505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p3 = moveForward(p3);
3506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        c3 = cAt(p3);
3507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (3)  CR x LF
3509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (c1==0x0d && c2==0x0a && p2==(p1+1)) {
3510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (4).   Sep  <break>
3514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fSepSet->contains(c1)) {
3515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p2 = p1+1;   // Separators don't combine with Extend or Format.
3516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
3517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p2 >= fText->length()) {
3520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Reached end of string.  Always a break position.
3521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
3522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (p2 == prevPos) {
3525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Still warming up the loop.  (won't work with zero length strings, but we don't care)
3526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (6).   ATerm x Numeric
3530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fATermSet->contains(c1) &&  fNumericSet->contains(c2))  {
3531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (7).  Upper ATerm  x  Uppper
3535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fUpperSet->contains(c0) && fATermSet->contains(c1) && fUpperSet->contains(c2)) {
3536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (8)  ATerm Close* Sp*  x  (not (OLettter | Upper | Lower | Sep | STerm | ATerm))* Lower
3540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //           Note:  STerm | ATerm are added to the negated part of the expression by a
3541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //                  note to the Unicode 5.0 documents.
3542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int p8 = p1;
3543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (fSpSet->contains(cAt(p8))) {
3544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p8 = moveBack(p8);
3545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (fCloseSet->contains(cAt(p8))) {
3547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p8 = moveBack(p8);
3548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fATermSet->contains(cAt(p8))) {
3550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p8=p2;
3551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for (;;) {
3552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c = cAt(p8);
3553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (c==-1 || fOLetterSet->contains(c) || fUpperSet->contains(c) ||
3554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    fLowerSet->contains(c) || fSepSet->contains(c) ||
3555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    fATermSet->contains(c) || fSTermSet->contains(c))  {
3556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    break;
3557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
3558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                p8 = moveForward(p8);
3559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fLowerSet->contains(cAt(p8))) {
3561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
3562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule 8a   (STerm | ATerm) Close* Sp* x (SContinue | STerm | ATerm);
3566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fSContinueSet->contains(c2) || fSTermSet->contains(c2) || fATermSet->contains(c2)) {
3567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p8 = p1;
3568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (fSpSet->contains(cAt(p8))) {
3569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                p8 = moveBack(p8);
3570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (fCloseSet->contains(cAt(p8))) {
3572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                p8 = moveBack(p8);
3573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c = cAt(p8);
3575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fSTermSet->contains(c) || fATermSet->contains(c)) {
3576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
3577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (9)  (STerm | ATerm) Close*  x  (Close | Sp | Sep | CR | LF)
3581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int p9 = p1;
3582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (fCloseSet->contains(cAt(p9))) {
3583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p9 = moveBack(p9);
3584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        c = cAt(p9);
3586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fSTermSet->contains(c) || fATermSet->contains(c))) {
3587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fCloseSet->contains(c2) || fSpSet->contains(c2) || fSepSet->contains(c2)) {
3588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
3589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (10)  (Sterm | ATerm) Close* Sp*  x  (Sp | Sep | CR | LF)
3593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int p10 = p1;
3594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (fSpSet->contains(cAt(p10))) {
3595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p10 = moveBack(p10);
3596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (fCloseSet->contains(cAt(p10))) {
3598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p10 = moveBack(p10);
3599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fSTermSet->contains(cAt(p10)) || fATermSet->contains(cAt(p10))) {
3601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fSpSet->contains(c2) || fSepSet->contains(c2)) {
3602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
3603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule (11)  (STerm | ATerm) Close* Sp* (Sep | CR | LF)?  <break>
3607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int p11 = p1;
3608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fSepSet->contains(cAt(p11))) {
3609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p11 = moveBack(p11);
3610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (fSpSet->contains(cAt(p11))) {
3612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p11 = moveBack(p11);
3613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (fCloseSet->contains(cAt(p11))) {
3615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p11 = moveBack(p11);
3616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fSTermSet->contains(cAt(p11)) || fATermSet->contains(cAt(p11))) {
3618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
3619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //  Rule (12)  Any x Any
3622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        continue;
3623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breakPos = p2;
3625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return breakPos;
3626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBISentMonkey::~RBBISentMonkey() {
3629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSets;
3630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSepSet;
3631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fFormatSet;
3632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSpSet;
3633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fLowerSet;
3634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fUpperSet;
3635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fOLetterSet;
3636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fNumericSet;
3637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fATermSet;
3638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSContinueSet;
3639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSTermSet;
3640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fCloseSet;
3641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fOtherSet;
3642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fExtendSet;
3643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------------
3648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
3649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  RBBILineMonkey
3650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
3651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------------
3652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class RBBILineMonkey: public RBBIMonkeyKind {
3654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
3655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBILineMonkey();
3656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual          ~RBBILineMonkey();
3657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  UVector *charClasses();
3658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  void     setText(const UnicodeString &s);
3659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  int32_t  next(int32_t i);
3660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual  void     rule9Adjust(int32_t pos, UChar32 *posChar, int32_t *nextPos, UChar32 *nextChar);
3661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
3662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector      *fSets;
3663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fBK;
3665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fCR;
3666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fLF;
3667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fCM;
3668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fNL;
3669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fSG;
3670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fWJ;
3671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fZW;
3672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fGL;
3673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fCB;
3674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fSP;
3675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fB2;
3676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fBA;
3677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fBB;
3678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fHY;
3679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fH2;
3680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fH3;
3681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fCL;
3682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fCP;
3683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fEX;
3684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fIN;
3685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fJL;
3686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fJV;
3687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fJT;
3688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fNS;
3689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fOP;
3690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fQU;
3691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fIS;
3692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fNU;
3693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fPO;
3694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fPR;
3695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fSY;
3696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fAI;
3697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fAL;
3698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fID;
3699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fSA;
3700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet  *fXX;
3701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator  *fCharBI;
3703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString  *fText;
3705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t              *fOrigPositions;
3706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher         *fNumberMatcher;
3708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher         *fLB11Matcher;
3709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
3710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBILineMonkey::RBBILineMonkey()
3713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
3714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
3715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets  = new UVector(status);
3717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fBK    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_Break=BK}]"), status);
3719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCR    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CR}]"), status);
3720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLF    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=LF}]"), status);
3721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCM    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CM}]"), status);
3722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNL    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NL}]"), status);
3723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fWJ    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=WJ}]"), status);
3724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fZW    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ZW}]"), status);
3725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fGL    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=GL}]"), status);
3726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCB    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CB}]"), status);
3727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSP    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SP}]"), status);
3728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fB2    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=B2}]"), status);
3729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fBA    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BA}]"), status);
3730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fBB    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BB}]"), status);
3731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fHY    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=HY}]"), status);
3732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fH2    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H2}]"), status);
3733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fH3    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H3}]"), status);
3734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCL    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CL}]"), status);
3735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCP    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CP}]"), status);
3736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fEX    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=EX}]"), status);
3737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fIN    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IN}]"), status);
3738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fJL    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JL}]"), status);
3739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fJV    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JV}]"), status);
3740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fJT    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JT}]"), status);
3741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNS    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NS}]"), status);
3742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fOP    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=OP}]"), status);
3743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fQU    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=QU}]"), status);
3744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fIS    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IS}]"), status);
3745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNU    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NU}]"), status);
3746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fPO    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PO}]"), status);
3747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fPR    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PR}]"), status);
3748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSY    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SY}]"), status);
3749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fAI    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AI}]"), status);
3750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fAL    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AL}]"), status);
3751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fID    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ID}]"), status);
3752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSA    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SA}]"), status);
3753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSG    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\ud800-\\udfff]"), status);
3754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fXX    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=XX}]"), status);
3755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
3757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        deferredStatus = status;
3758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fCharBI = NULL;
3759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fNumberMatcher = NULL;
3760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
3761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fAL->addAll(*fXX);     // Default behavior for XX is identical to AL
3764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fAL->addAll(*fAI);     // Default behavior for AI is identical to AL
3765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fAL->addAll(*fSA);     // Default behavior for SA is XX, which defaults to AL
3766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fAL->addAll(*fSG);     // Default behavior for SG is identical to AL.
3767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fBK, status);
3769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fCR, status);
3770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fLF, status);
3771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fCM, status);
3772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fNL, status);
3773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fWJ, status);
3774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fZW, status);
3775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fGL, status);
3776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fCB, status);
3777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fSP, status);
3778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fB2, status);
3779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fBA, status);
3780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fBB, status);
3781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fHY, status);
3782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fH2, status);
3783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fH3, status);
3784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fCL, status);
3785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fCP, status);
3786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fEX, status);
3787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fIN, status);
3788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fJL, status);
3789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fJT, status);
3790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fJV, status);
3791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fNS, status);
3792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fOP, status);
3793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fQU, status);
3794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fIS, status);
3795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fNU, status);
3796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fPO, status);
3797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fPR, status);
3798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fSY, status);
3799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fAI, status);
3800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fAL, status);
3801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fID, status);
3802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fWJ, status);
3803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fSA, status);
3804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSets->addElement(fSG, status);
3805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *rules =
3807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?"
3808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?"
3809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "\\p{Line_Break=NU}\\p{Line_Break=CM}*"
3810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*"
3811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "((\\p{Line_Break=CL}|\\p{Line_Break=CP})\\p{Line_Break=CM}*)?"
3812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?";
3813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNumberMatcher = new RegexMatcher(
3815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString(rules, -1, US_INV), 0, status);
3816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCharBI = BreakIterator::createCharacterInstance(Locale::getEnglish(), status);
3818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
3820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        deferredStatus = status;
3821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBILineMonkey::setText(const UnicodeString &s) {
3826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fText       = &s;
3827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCharBI->setText(s);
3828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNumberMatcher->reset(s);
3829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
3832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  rule9Adjust
3833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     Line Break TR rules 9 and 10 implementation.
3834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     This deals with combining marks and other sequences that
3835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     that must be treated as if they were something other than what they actually are.
3836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
3837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     This is factored out into a separate function because it must be applied twice for
3838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     each potential break, once to the chars before the position being checked, then
3839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     again to the text following the possible break.
3840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
3841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBILineMonkey::rule9Adjust(int32_t pos, UChar32 *posChar, int32_t *nextPos, UChar32 *nextChar) {
3842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (pos == -1) {
3843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Invalid initial position.  Happens during the warmup iteration of the
3844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //   main loop in next().
3845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
3846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t  nPos = *nextPos;
3849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // LB 9  Keep combining sequences together.
3851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  advance over any CM class chars.  Note that Line Break CM is different
3852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  from the normal Grapheme Extend property.
3853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!(fSP->contains(*posChar) || fBK->contains(*posChar) || *posChar==0x0d ||
3854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          *posChar==0x0a ||fNL->contains(*posChar) || fZW->contains(*posChar))) {
3855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (;;) {
3856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *nextChar = fText->char32At(nPos);
3857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (!fCM->contains(*nextChar)) {
3858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
3859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            nPos = fText->moveIndex32(nPos, 1);
3861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // LB 9 Treat X CM* as if it were x.
3866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //       No explicit action required.
3867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // LB 10  Treat any remaining combining mark as AL
3869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCM->contains(*posChar)) {
3870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *posChar = 0x41;   // thisChar = 'A';
3871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Push the updated nextPos and nextChar back to our caller.
3874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // This only makes a difference if posChar got bigger by consuming a
3875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // combining sequence.
3876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *nextPos  = nPos;
3877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *nextChar = fText->char32At(nPos);
3878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RBBILineMonkey::next(int32_t startPos) {
3883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
3884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    pos;       //  Index of the char following a potential break position
3885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32    thisChar;  //  Character at above position "pos"
3886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    prevPos;   //  Index of the char preceding a potential break position
3888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32    prevChar;  //  Character at above position.  Note that prevChar
3889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          //   and thisChar may not be adjacent because combining
3890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          //   characters between them will be ignored.
3891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    nextPos;   //  Index of the next character following pos.
3893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          //     Usually skips over combining marks.
3894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    nextCPPos; //  Index of the code point following "pos."
3895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          //     May point to a combining mark.
3896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t    tPos;      //  temp value.
3897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32    c;
3898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(deferredStatus)) {
3900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
3901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (startPos >= fText->length()) {
3904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
3905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Initial values for loop.  Loop will run the first time without finding breaks,
3909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //                           while the invalid values shift out and the "this" and
3910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //                           "prev" positions are filled in with good values.
3911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pos      = prevPos   = -1;    // Invalid value, serves as flag for initial loop iteration.
3912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    thisChar = prevChar  = 0;
3913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    nextPos  = nextCPPos = startPos;
3914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Loop runs once per position in the test text, until a break position
3917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  is found.
3918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (;;) {
3919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        prevPos   = pos;
3920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        prevChar  = thisChar;
3921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        pos       = nextPos;
3923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        thisChar  = fText->char32At(pos);
3924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nextCPPos = fText->moveIndex32(pos, 1);
3926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nextPos   = nextCPPos;
3927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule LB2 - Break at end of text.
3929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (pos >= fText->length()) {
3930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
3931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Rule LB 9 - adjust for combining sequences.
3934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //             We do this one out-of-order because the adjustment does not change anything
3935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //             that would match rules LB 3 - LB 6, but after the adjustment, LB 3-6 do need to
3936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //             be applied.
3937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        rule9Adjust(prevPos, &prevChar, &pos,     &thisChar);
3938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nextCPPos = nextPos = fText->moveIndex32(pos, 1);
3939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        c = fText->char32At(nextPos);
3940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        rule9Adjust(pos,     &thisChar, &nextPos, &c);
3941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // If the loop is still warming up - if we haven't shifted the initial
3943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //   -1 positions out of prevPos yet - loop back to advance the
3944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    position in the input without any further looking for breaks.
3945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (prevPos == -1) {
3946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 4  Always break after hard line breaks,
3950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fBK->contains(prevChar)) {
3951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
3952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 5  Break after CR, LF, NL, but not inside CR LF
3955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (prevChar == 0x0d && thisChar == 0x0a) {
3956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (prevChar == 0x0d ||
3959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            prevChar == 0x0a ||
3960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            prevChar == 0x85)  {
3961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
3962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 6  Don't break before hard line breaks
3965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (thisChar == 0x0d || thisChar == 0x0a || thisChar == 0x85 ||
3966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fBK->contains(thisChar)) {
3967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
3968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 7  Don't break before spaces or zero-width space.
3972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fSP->contains(thisChar)) {
3973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fZW->contains(thisChar)) {
3977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 8  Break after zero width space
3981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fZW->contains(prevChar)) {
3982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
3983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 9, 10  Already done, at top of loop.
3986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
3987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 11  Do not break before or after WORD JOINER and related characters.
3990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    x  WJ
3991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    WJ  x
3992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
3993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fWJ->contains(thisChar) || fWJ->contains(prevChar)) {
3994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
3995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 12
3998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    GL  x
3999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fGL->contains(prevChar)) {
4000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 12a
4004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    [^SP BA HY] x GL
4005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (!(fSP->contains(prevChar) ||
4006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              fBA->contains(prevChar) ||
4007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              fHY->contains(prevChar)     ) && fGL->contains(thisChar)) {
4008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 13  Don't break before closings.
4014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //        NU x CL,  NU x CP  and NU x IS are not matched here so that they will
4015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //        fall into LB 17 and the more general number regular expression.
4016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
4017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((!fNU->contains(prevChar) && fCL->contains(thisChar)) ||
4018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (!fNU->contains(prevChar) && fCP->contains(thisChar)) ||
4019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                         fEX->contains(thisChar)  ||
4020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (!fNU->contains(prevChar) && fIS->contains(thisChar)) ||
4021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (!fNU->contains(prevChar) && fSY->contains(thisChar)))    {
4022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 14 Don't break after OP SP*
4026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //       Scan backwards, checking for this sequence.
4027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //       The OP char could include combining marks, so we actually check for
4028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //           OP CM* SP*
4029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //       Another Twist: The Rule 67 fixes may have changed a SP CM
4030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //       sequence into a ID char, so before scanning back through spaces,
4031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //       verify that prevChar is indeed a space.  The prevChar variable
4032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //       may differ from fText[prevPos]
4033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        tPos = prevPos;
4034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fSP->contains(prevChar)) {
4035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (tPos > 0 && fSP->contains(fText->char32At(tPos))) {
4036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tPos=fText->moveIndex32(tPos, -1);
4037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (tPos > 0 && fCM->contains(fText->char32At(tPos))) {
4040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            tPos=fText->moveIndex32(tPos, -1);
4041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fOP->contains(fText->char32At(tPos))) {
4043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 15    QU SP* x OP
4048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fOP->contains(thisChar)) {
4049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Scan backwards from prevChar to see if it is preceded by QU CM* SP*
4050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int tPos = prevPos;
4051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (tPos>0 && fSP->contains(fText->char32At(tPos))) {
4052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tPos = fText->moveIndex32(tPos, -1);
4053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (tPos>0 && fCM->contains(fText->char32At(tPos))) {
4055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tPos = fText->moveIndex32(tPos, -1);
4056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fQU->contains(fText->char32At(tPos))) {
4058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
4059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 16   (CL | CP) SP* x NS
4065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    Scan backwards for SP* CM* (CL | CP)
4066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fNS->contains(thisChar)) {
4067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int tPos = prevPos;
4068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (tPos>0 && fSP->contains(fText->char32At(tPos))) {
4069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tPos = fText->moveIndex32(tPos, -1);
4070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (tPos>0 && fCM->contains(fText->char32At(tPos))) {
4072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tPos = fText->moveIndex32(tPos, -1);
4073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fCL->contains(fText->char32At(tPos)) || fCP->contains(fText->char32At(tPos))) {
4075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
4076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 17        B2 SP* x B2
4081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fB2->contains(thisChar)) {
4082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //  Scan backwards, checking for the B2 CM* SP* sequence.
4083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            tPos = prevPos;
4084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fSP->contains(prevChar)) {
4085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                while (tPos > 0 && fSP->contains(fText->char32At(tPos))) {
4086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    tPos=fText->moveIndex32(tPos, -1);
4087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
4088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (tPos > 0 && fCM->contains(fText->char32At(tPos))) {
4090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                tPos=fText->moveIndex32(tPos, -1);
4091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fB2->contains(fText->char32At(tPos))) {
4093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
4094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 18    break after space
4099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fSP->contains(prevChar)) {
4100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
4101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 19
4104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    x   QU
4105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    QU  x
4106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fQU->contains(thisChar) || fQU->contains(prevChar)) {
4107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 20  Break around a CB
4111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fCB->contains(thisChar) || fCB->contains(prevChar)) {
4112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
4113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 21
4116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fBA->contains(thisChar) ||
4117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fHY->contains(thisChar) ||
4118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fNS->contains(thisChar) ||
4119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fBB->contains(prevChar) )   {
4120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 22
4124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fAL->contains(prevChar) && fIN->contains(thisChar)) ||
4125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fID->contains(prevChar) && fIN->contains(thisChar)) ||
4126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fIN->contains(prevChar) && fIN->contains(thisChar)) ||
4127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fNU->contains(prevChar) && fIN->contains(thisChar)) )   {
4128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 23    ID x PO
4133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //          AL x NU
4134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //          NU x AL
4135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fID->contains(prevChar) && fPO->contains(thisChar)) ||
4136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fAL->contains(prevChar) && fNU->contains(thisChar)) ||
4137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fNU->contains(prevChar) && fAL->contains(thisChar)) )   {
4138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 24  Do not break between prefix and letters or ideographs.
4142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //        PR x ID
4143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //        PR x AL
4144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //        PO x AL
4145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fPR->contains(prevChar) && fID->contains(thisChar)) ||
4146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fPR->contains(prevChar) && fAL->contains(thisChar)) ||
4147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fPO->contains(prevChar) && fAL->contains(thisChar)) )   {
4148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 25    Numbers
4154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fNumberMatcher->lookingAt(prevPos, status)) {
4155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (U_FAILURE(status)) {
4156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
4157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Matched a number.  But could have been just a single digit, which would
4159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //    not represent a "no break here" between prevChar and thisChar
4160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t numEndIdx = fNumberMatcher->end(status);  // idx of first char following num
4161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (numEndIdx > pos) {
4162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Number match includes at least our two chars being checked
4163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (numEndIdx > nextPos) {
4164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // Number match includes additional chars.  Update pos and nextPos
4165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    //   so that next loop iteration will continue at the end of the number,
4166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    //   checking for breaks between last char in number & whatever follows.
4167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    pos = nextPos = numEndIdx;
4168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    do {
4169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        pos = fText->moveIndex32(pos, -1);
4170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        thisChar = fText->char32At(pos);
4171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    } while (fCM->contains(thisChar));
4172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
4173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
4174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 26 Do not break a Korean syllable.
4179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fJL->contains(prevChar) && (fJL->contains(thisChar) ||
4180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                        fJV->contains(thisChar) ||
4181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                        fH2->contains(thisChar) ||
4182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                        fH3->contains(thisChar))) {
4183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                            continue;
4184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                        }
4185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fJV->contains(prevChar) || fH2->contains(prevChar))  &&
4187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (fJV->contains(thisChar) || fJT->contains(thisChar))) {
4188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
4189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fJT->contains(prevChar) || fH3->contains(prevChar)) &&
4192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fJT->contains(thisChar)) {
4193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
4194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 27 Treat a Korean Syllable Block the same as ID.
4197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fJL->contains(prevChar) || fJV->contains(prevChar) ||
4198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fJT->contains(prevChar) || fH2->contains(prevChar) || fH3->contains(prevChar)) &&
4199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fIN->contains(thisChar)) {
4200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
4201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fJL->contains(prevChar) || fJV->contains(prevChar) ||
4203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fJT->contains(prevChar) || fH2->contains(prevChar) || fH3->contains(prevChar)) &&
4204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fPO->contains(thisChar)) {
4205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
4206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fPR->contains(prevChar) && (fJL->contains(thisChar) || fJV->contains(thisChar) ||
4208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fJT->contains(thisChar) || fH2->contains(thisChar) || fH3->contains(thisChar))) {
4209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
4210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 28  Do not break between alphabetics ("at").
4215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fAL->contains(prevChar) && fAL->contains(thisChar)) {
4216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 29  Do not break between numeric punctuation and alphabetics ("e.g.").
4220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fIS->contains(prevChar) && fAL->contains(thisChar)) {
4221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 30    Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation.
4225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //          (AL | NU) x OP
4226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //          CP x (AL | NU)
4227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((fAL->contains(prevChar) || fNU->contains(prevChar)) && fOP->contains(thisChar)) {
4228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fCP->contains(prevChar) && (fAL->contains(thisChar) || fNU->contains(thisChar))) {
4231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // LB 31    Break everywhere else
4235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        break;
4236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return pos;
4240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
4241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UVector  *RBBILineMonkey::charClasses() {
4244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fSets;
4245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
4246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBILineMonkey::~RBBILineMonkey() {
4249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSets;
4250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fBK;
4252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fCR;
4253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fLF;
4254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fCM;
4255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fNL;
4256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fWJ;
4257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fZW;
4258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fGL;
4259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fCB;
4260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSP;
4261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fB2;
4262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fBA;
4263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fBB;
4264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fHY;
4265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fH2;
4266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fH3;
4267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fCL;
4268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fCP;
4269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fEX;
4270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fIN;
4271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fJL;
4272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fJV;
4273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fJT;
4274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fNS;
4275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fOP;
4276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fQU;
4277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fIS;
4278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fNU;
4279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fPO;
4280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fPR;
4281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSY;
4282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fAI;
4283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fAL;
4284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fID;
4285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSA;
4286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSG;
4287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fXX;
4288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fCharBI;
4290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fNumberMatcher;
4291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
4292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------------
4295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
4296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   TestMonkey
4297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
4298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     params
4299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//       seed=nnnnn        Random number starting seed.
4300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                         Setting the seed allows errors to be reproduced.
4301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//       loop=nnn          Looping count.  Controls running time.
4302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                         -1:  run forever.
4303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                          0 or greater:  run length.
4304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
4305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//       type = char | word | line | sent | title
4306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
4307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------------
4308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t  getIntParam(UnicodeString name, UnicodeString &params, int32_t defaultVal) {
4310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t val = defaultVal;
4311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    name.append(" *= *(-?\\d+)");
4312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
4313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RegexMatcher m(name, params, 0, status);
4314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (m.find()) {
4315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // The param exists.  Convert the string to an int.
4316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        char valString[100];
4317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t paramLength = m.end(1, status) - m.start(1, status);
4318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (paramLength >= (int32_t)(sizeof(valString)-1)) {
4319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            paramLength = (int32_t)(sizeof(valString)-2);
4320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        params.extract(m.start(1, status), paramLength, valString, sizeof(valString));
4322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        val = strtol(valString,  NULL, 10);
4323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Delete this parameter from the params string.
4325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        m.reset();
4326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        params = m.replaceFirst("", status);
4327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    U_ASSERT(U_SUCCESS(status));
4329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return val;
4330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
4331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
4332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void testBreakBoundPreceding(RBBITest *test, UnicodeString ustr,
4334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                    BreakIterator *bi,
4335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                    int expected[],
4336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                    int expectedcount)
4337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
4338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int count = 0;
4339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int i = 0;
4340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int forward[50];
4341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi->setText(ustr);
4342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) {
4343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        forward[count] = i;
4344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (count < expectedcount && expected[count] != i) {
4345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            test->errln("break forward test failed: expected %d but got %d",
4346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        expected[count], i);
4347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
4348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        count ++;
4350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (count != expectedcount) {
4352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        printStringBreaks(ustr, expected, expectedcount);
4353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        test->errln("break forward test failed: missed %d match",
4354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    expectedcount - count);
4355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
4356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // testing boundaries
4358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = 1; i < expectedcount; i ++) {
4359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int j = expected[i - 1];
4360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (!bi->isBoundary(j)) {
4361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            printStringBreaks(ustr, expected, expectedcount);
4362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            test->errln("isBoundary() failed.  Expected boundary at position %d", j);
4363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
4364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (j = expected[i - 1] + 1; j < expected[i]; j ++) {
4366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (bi->isBoundary(j)) {
4367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                printStringBreaks(ustr, expected, expectedcount);
4368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                test->errln("isBoundary() failed.  Not expecting boundary at position %d", j);
4369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return;
4370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) {
4375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        count --;
4376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (forward[count] != i) {
4377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            printStringBreaks(ustr, expected, expectedcount);
4378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            test->errln("happy break test previous() failed: expected %d but got %d",
4379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        forward[count], i);
4380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
4381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (count != 0) {
4384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        printStringBreaks(ustr, expected, expectedcount);
4385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        test->errln("break test previous() failed: missed a match");
4386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
4387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // testing preceding
4390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = 0; i < expectedcount - 1; i ++) {
4391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // int j = expected[i] + 1;
4392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int j = ustr.moveIndex32(expected[i], 1);
4393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (; j <= expected[i + 1]; j ++) {
4394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (bi->preceding(j) != expected[i]) {
4395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                printStringBreaks(ustr, expected, expectedcount);
4396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                test->errln("preceding(): Not expecting boundary at position %d", j);
4397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return;
4398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
4402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestWordBreaks(void)
4404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
4405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS
4406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Locale        locale("en");
4408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode    status = U_ZERO_ERROR;
4409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // BreakIterator  *bi = BreakIterator::createCharacterInstance(locale, status);
4410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *bi = BreakIterator::createWordInstance(locale, status);
4411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Replaced any C+J characters in a row with a random sequence of characters
4412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // of the same length to make our C+J segmentation not get in the way.
4413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char *strlist[] =
4414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
4415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U000e0032\\u0097\\u0f94\\uc2d8\\u05f4\\U000e0031\\u060d",
4416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U000e0037\\u2666\\u1202\\u003a\\U000e0031\\u064d\\u0bea\\u091c\\U000e0040\\u003b",
4417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0589\\u3e99\\U0001d7f3\\U000e0074\\u1810\\u200e\\U000e004b\\u0027\\U000e0061\\u003a",
4418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u398c\\U000104a5\\U0001d173\\u102d\\u002e\\uca3b\\u002e\\u002c\\u5622",
4419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\uac00\\u3588\\u009c\\u0953\\u194b",
4420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e",
4421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0602\\u2019\\ua191\\U000e0063\\u0a4c\\u003a\\ub4b5\\u003a\\u827f\\u002e",
4422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u2f1f\\u1634\\u05f8\\u0944\\u04f2\\u0cdf\\u1f9c\\u05f4\\u002e",
4423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044",
4424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u003b\\u024a\\u102e\\U000e0071\\u0600",
4425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u2027\\U000e0067\\u0a47\\u00b7",
4426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0",
4427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027",
4428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0589\\U000e006e\\u0a42\\U000104a5",
4429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0f66\\u2523\\u003a\\u0cae\\U000e0047\\u003a",
4430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7",
4431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0027\\u11af\\U000e0057\\u0602",
4432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U0001d7f2\\U000e007\\u0004\\u0589",
4433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b",
4434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U0001d7f2\\U000e007d\\u0004\\u0589",
4435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d",
4436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959",
4437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U000e0065\\u302c\\uc986\\u09ee\\U000e0068",
4438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7",
4439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0233\\U000e0020\\u0a69\\u0d6a",
4440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019",
4441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u18f4\\U000e0049\\u20e7\\u2027",
4442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\ub315\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe",
4443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\ua183\\u102d\\u0bec\\u003a",
4444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u17e8\\u06e7\\u002e\\u096d\\u003b",
4445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u003a\\u0e57\\u0fad\\u002e",
4446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de",
4447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a",
4448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U000e005d\\u2044\\u0731\\u0650\\u0061",
4449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u003a\\u0664\\u00b7\\u1fba",
4450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u003b\\u0027\\u00b7\\u47a3",
4451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u2027\\U000e0067\\u0a42\\u00b7\\u4edf\\uc26c\\u003a\\u4186\\u041b",
4452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0027\\u003a\\U0001d70f\\U0001d7df\\ubf4a\\U0001d7f5\\U0001d177\\u003a\\u0e51\\u1058\\U000e0058\\u00b7\\u0673",
4453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\uc30d\\u002e\\U000e002c\\u0c48\\u003a\\ub5a1\\u0661\\u002c",
4454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
4455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int loop;
4456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
4457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
4458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
4459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
4461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // printf("looping %d\n", loop);
4462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString ustr = CharsToUnicodeString(strlist[loop]);
4463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // RBBICharMonkey monkey;
4464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        RBBIWordMonkey monkey;
4465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int expected[50];
4467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int expectedcount = 0;
4468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        monkey.setText(ustr);
4470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int i;
4471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) {
4472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expected[expectedcount ++] = i;
4473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        testBreakBoundPreceding(this, ustr, bi, expected, expectedcount);
4476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
4478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
4479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
4480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestWordBoundary(void)
4482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
4483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // <data><>\u1d4a\u206e<?>\u0603\U0001d7ff<>\u2019<></data>
4484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Locale        locale("en");
4485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode    status = U_ZERO_ERROR;
4486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // BreakIterator  *bi = BreakIterator::createCharacterInstance(locale, status);
4487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *bi = BreakIterator::createWordInstance(locale, status);
4488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar         str[50];
4489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char *strlist[] =
4490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
4491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e",
4492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044",
4493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u003b\\u024a\\u102e\\U000e0071\\u0600",
4494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u2027\\U000e0067\\u0a47\\u00b7",
4495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0",
4496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027",
4497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0589\\U000e006e\\u0a42\\U000104a5",
4498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u4f66\\ub523\\u003a\\uacae\\U000e0047\\u003a",
4499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7",
4500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0027\\u11af\\U000e0057\\u0602",
4501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U0001d7f2\\U000e007\\u0004\\u0589",
4502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b",
4503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U0001d7f2\\U000e007d\\u0004\\u0589",
4504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d",
4505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0e01\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959",
4506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U000e0065\\u302c\\u09ee\\U000e0068",
4507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7",
4508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u0233\\U000e0020\\u0a69\\u0d6a",
4509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019",
4510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u58f4\\U000e0049\\u20e7\\u2027",
4511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe",
4512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\ua183\\u102d\\u0bec\\u003a",
4513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u17e8\\u06e7\\u002e\\u096d\\u003b",
4514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u003a\\u0e57\\u0fad\\u002e",
4515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de",
4516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a",
4517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\ua2a5\\u0038\\u2044\\u002e\\u0c67\\U000e003c\\u05f4\\u2027\\u05f4\\u2019",
4518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u003a\\u0664\\u00b7\\u1fba",
4519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "\\u003b\\u0027\\u00b7\\u47a3",
4520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
4521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int loop;
4522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
4523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
4524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
4525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
4527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // printf("looping %d\n", loop);
4528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        u_unescape(strlist[loop], str, 20);
4529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString ustr(str);
4530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int forward[50];
4531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int count = 0;
4532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        bi->setText(ustr);
4534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int prev = 0;
4535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int i;
4536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) {
4537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            forward[count ++] = i;
4538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (i > prev) {
4539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                int j;
4540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                for (j = prev + 1; j < i; j ++) {
4541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if (bi->isBoundary(j)) {
4542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        printStringBreaks(ustr, forward, count);
4543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        errln("happy boundary test failed: expected %d not a boundary",
4544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                               j);
4545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return;
4546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
4547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
4548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (!bi->isBoundary(i)) {
4550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                printStringBreaks(ustr, forward, count);
4551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("happy boundary test failed: expected %d a boundary",
4552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       i);
4553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return;
4554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            prev = i;
4556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
4559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
4560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestLineBreaks(void)
4562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
4563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS
4564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Locale        locale("en");
4565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode    status = U_ZERO_ERROR;
4566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *bi = BreakIterator::createLineInstance(locale, status);
4567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const int32_t  STRSIZE = 50;
4568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar         str[STRSIZE];
4569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char *strlist[] =
4570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
4571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u300f\\ufdfc\\ub798\\u2011\\u2011\\u0020\\u0b43\\u002d\\ubeec\\ufffc",
4572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u24ba\\u2060\\u3405\\ub290\\u000d\\U000e0032\\ufe35\\u00a0\\u0361\\"
4573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             "U000112ed\\u0f0c\\u000a\\u308e\\ua875\\u0085\\u114d",
4574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\ufffc\\u3063\\u2e08\\u30e3\\u000d\\u002d\\u0ed8\\u002f\\U00011a57\\"
4575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             "u2014\\U000e0105\\u118c\\u000a\\u07f8",
4576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u0668\\u192b\\u002f\\u2034\\ufe39\\u00b4\\u0cc8\\u2571\\u200b\\u003f",
4577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\ufeff\\ufffc\\u3289\\u0085\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123",
4578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\ufe3c\\u201c\\u000d\\u2025\\u2007\\u201c\\u002d\\u20a0\\u002d\\u30a7\\u17a4",
4579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123",
4580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u002d\\uff1b\\u02c8\\u2029\\ufeff\\u0f22\\u2044\\ufe09\\u003a\\u096d\\u2009\\u000a\\u06f7\\u02cc\\u1019\\u2060",
4581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u1781\\u0b68\\u0f0c\\u3010\\u0085\\U00011f7a\\u0020\\u0dd6\\u200b\\U000e007a\\u000a\\u2060\\u2026\\u002f\\u2026\\u24dc\\u101e\\u2014\\u2007\\u30a5",
4582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u2770\\u0020\\U000e010f\\u0020\\u2060\\u000a\\u02cc\\u0bcc\\u060d\\u30e7\\u0f3b\\u002f",
4583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\ufeff\\u0028\\u003b\\U00012fec\\u2010\\u0020\\u0004\\u200b\\u0020\\u275c\\u002f\\u17b1",
4584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u20a9\\u2014\\u00a2\\u31f1\\u002f\\u0020\\u05b8\\u200b\\u0cc2\\u003b\\u060d\\u02c8\\ua4e8\\u002f\\u17d5",
4585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u002d\\u136f\\uff63\\u0084\\ua933\\u2028\\u002d\\u431b\\u200b\\u20b0",
4586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\uade3\\u11d6\\u000a\\U0001107d\\u203a\\u201d\\ub070\\u000d\\u2024\\ufffc",
4587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\uff5b\\u101c\\u1806\\u002f\\u2213\\uff5f",
4588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u2014\\u0a83\\ufdfc\\u003f\\u00a0\\u0020\\u000a\\u2991\\U0001d179\\u0020\\u201d\\U000125f6\\u0a67\\u20a7\\ufeff\\u043f",
4589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u169b\\U000e0130\\u002d\\u1041\\u0f3d\\u0abf\\u00b0\\u31fb\\u00a0\\u002d\\u02c8\\u003b",
4590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u2762\\u1680\\u002d\\u2028\\u0027\\u01dc\\ufe56\\u003a\\u000a\\uffe6\\u29fd\\u0020\\u30ee\\u007c\\U0001d178\\u0af1\\u0085",
4591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u3010\\u200b\\u2029\\ufeff\\ufe6a\\u275b\\U000e013b\\ufe37\\u24d4\\u002d\\u1806\\u256a\\u1806\\u247c\\u0085\\u17ac",
4592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u99ab\\u0027\\u003b\\u2026\\ueaf0\\u0020\\u0020\\u0313\\u0020\\u3099\\uff09\\u208e\\u2011\\u2007\\u2060\\u000a\\u0020\\u0020\\u300b\\u0bf9",
4593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u1806\\u060d\\u30f5\\u00b4\\u17e9\\u2544\\u2028\\u2024\\u2011\\u20a3\\u002d\\u09cc\\u1782\\u000d\\uff6f\\u0025",
4594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u002f\\uf22e\\u1944\\ufe3d\\u0020\\u206f\\u31b3\\u2014\\u002d\\u2025\\u0f0c\\u0085\\u2763",
4595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u002f\\u2563\\u202f\\u0085\\u17d5\\u200b\\u0020\\U000e0043\\u2014\\u058a\\u3d0a\\ufe57\\u2035\\u2028\\u2029",
4596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u20ae\\U0001d169\\u9293\\uff1f\\uff1f\\u0021\\u2012\\u2039\\u0085\\u02cc\\u00a2\\u0020\\U000e01ab\\u3085\\u0f3a\\u1806\\u0f0c\\u1945\\u000a\\U0001d7e7",
4597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\uffe6\\u00a0\\u200b\\u0085\\u2116\\u255b\\U0001d7f7\\u178c\\ufffc",
4598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u02cc\\ufe6a\\u00a0\\u0021\\u002d\\u7490\\uec2e\\u200b\\u000a",
4599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\uec2e\\u200b\\u000a\\u0020\\u2028\\u2014\\u8945",
4600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u7490\\uec2e\\u200b\\u000a\\u0020\\u2028\\u2014",
4601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u0020\\u2028\\u2014\\u8945\\u002c\\u005b",
4602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u000a\\ufe3c\\u201c\\u000d\\u2025\\u2007\\u201c\\u002d\\u20a0",
4603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u2473\\u0e9d\\u0020\\u0085\\u000a\\ufe3c\\u201c\\u000d\\u2025",
4604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\U0001d16e\\ufffc\\u2025\\u0021\\u002d",
4605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\ufffc\\u301b\\u0fa5\\U000e0103\\u2060\\u208e\\u17d5\\u034f\\u1009\\u003a\\u180e\\u2009\\u3111",
4606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u2014\\u0020\\u000a\\u17c5\\u24fc",
4607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\ufffc\\u0020\\u2116\\uff6c\\u200b\\u0ac3\\U0001028f",
4608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\uaeb0\\u0344\\u0085\\ufffc\\u073b\\u2010",
4609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\ufeff\\u0589\\u0085\\u0eb8\\u30fd\\u002f\\u003a\\u2014\\ufe43",
4610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u09cc\\u256a\\u276d\\u002d\\u3085\\u000d\\u0e05\\u2028\\u0fbb",
4611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u2034\\u00bb\\u0ae6\\u300c\\u0020\\u31f8\\ufffc",
4612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u2116\\u0ed2\\uff64\\u02cd\\u2001\\u2060",
4613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         "\\u809d\\u2e02\\u0f0a\\uc48f\\u2540\\u000d\\u0cef\\u003a\\u0e4d"
4614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         "\\U000e0172\\U000e005c\\u17cf\\U00010ca6\\ufeff\\uf621\\u06f3\\uffe5"
4615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         "\\u0ea2\\ufeff\\udcea\\u3085\\ua874\\u000a\\u0020\\u000b\\u200b",
4616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\ufe10\\u2060\\u1a5a\\u2060\\u17e4\\ufffc\\ubbe1\\ufe15\\u0020\\u00a0",
4617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         "\\u2060\\u2213\\u200b\\u2019\\uc2dc\\uff6a\\u1736\\u0085\\udb07",
4618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
4619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int loop;
4620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT_SUCCESS(status);
4621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
4622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
4623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
4625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // printf("looping %d\n", loop);
4626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t t = u_unescape(strlist[loop], str, STRSIZE);
4627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (t >= STRSIZE) {
4628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            TEST_ASSERT(FALSE);
4629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString ustr(str);
4634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        RBBILineMonkey monkey;
4635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(monkey.deferredStatus)) {
4636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        const int EXPECTEDSIZE = 50;
4640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int expected[EXPECTEDSIZE];
4641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int expectedcount = 0;
4642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        monkey.setText(ustr);
4644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int i;
4645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) {
4646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (expectedcount >= EXPECTEDSIZE) {
4647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(expectedcount < EXPECTEDSIZE);
4648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return;
4649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expected[expectedcount ++] = i;
4651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        testBreakBoundPreceding(this, ustr, bi, expected, expectedcount);
4654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
4656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
4657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
4658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestSentBreaks(void)
4660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
4661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS
4662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Locale        locale("en");
4663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode    status = U_ZERO_ERROR;
4664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *bi = BreakIterator::createSentenceInstance(locale, status);
4665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar         str[200];
4666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char *strlist[] =
4667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
4668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "Now\ris\nthe\r\ntime\n\rfor\r\r",
4669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "This\n",
4670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000.",
4671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\"Sentence ending with a quote.\" Bye.",
4672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "  (This is it).  Testing the sentence iterator. \"This isn't it.\"",
4673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "Hi! This is a simple sample sentence. (This is it.) This is a simple sample sentence. \"This isn't it.\"",
4674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "Hi! This is a simple sample sentence. It does not have to make any sense as you can see. ",
4675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ",
4676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "Che la dritta via aveo smarrita. He said, that I said, that you said!! ",
4677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "Don't rock the boat.\\u2029Because I am the daddy, that is why. Not on my time (el timo.)!",
4678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\U0001040a\\u203a\\u1217\\u2b23\\u000d\\uff3b\\u03dd\\uff57\\u0a69\\u104a\\ufe56\\ufe52"
4679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             "\\u3016\\U000e002f\\U000e0077\\u0662\\u1680\\u2984\\U000e006a\\u002e\\ua6ab\\u104a"
4680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             "\\u002e\\u019b\\u2005\\u002e\\u0477\\u0438\\u0085\\u0441\\u002e\\u5f61\\u202f"
4681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             "\\U0001019f\\uff08\\u27e8\\u055c\\u0352",
4682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     "\\u1f3e\\u004d\\u000a\\ua3e4\\U000e0023\\uff63\\u0c52\\u276d\\U0001d5de\\U0001d171"
4683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             "\\u0e38\\u17e5\\U00012fe6\\u0fa9\\u267f\\u1da3\\u0046\\u03ed\\udc72\\u0030"
4684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             "\\U0001d688\\u0b6d\\u0085\\u0c67\\u1f94\\u0c6c\\u9cb2\\u202a\\u180e\\u000b"
4685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             "\\u002e\\U000e005e\\u035b\\u061f\\u02c1\\U000e0025\\u0357\\u0969\\u202b"
4686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             "\\U000130c5\\u0486\\U000e0123\\u2019\\u01bc\\u2006\\u11ad\\u180e\\u2e05"
4687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             "\\u10b7\\u013e\\u000a\\u002e\\U00013ea4"
4688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
4689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int loop;
4690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
4691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
4692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
4693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) {
4695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        u_unescape(strlist[loop], str, (int32_t)(sizeof(str) / sizeof(str[0])));
4696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString ustr(str);
4697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        RBBISentMonkey monkey;
4699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(monkey.deferredStatus)) {
4700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
4701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        const int EXPECTEDSIZE = 50;
4704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int expected[EXPECTEDSIZE];
4705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int expectedcount = 0;
4706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        monkey.setText(ustr);
4708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int i;
4709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) {
4710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (expectedcount >= EXPECTEDSIZE) {
4711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(expectedcount < EXPECTEDSIZE);
4712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return;
4713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expected[expectedcount ++] = i;
4715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        testBreakBoundPreceding(this, ustr, bi, expected, expectedcount);
4718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
4720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
4721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
4722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestMonkey(char *params) {
4724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS
4725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode     status    = U_ZERO_ERROR;
4727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t        loopCount = 500;
4728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t        seed      = 1;
4729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString  breakType = "all";
4730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Locale         locale("en");
4731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool          useUText  = FALSE;
4732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (quick == FALSE) {
4734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        loopCount = 10000;
4735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (params) {
4738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString p(params);
4739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        loopCount = getIntParam("loop", p, loopCount);
4740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        seed      = getIntParam("seed", p, seed);
4741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        RegexMatcher m(" *type *= *(char|word|line|sent|title) *", p, 0, status);
4743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (m.find()) {
4744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            breakType = m.group(1, status);
4745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            m.reset();
4746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p = m.replaceFirst("", status);
4747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        RegexMatcher u(" *utext", p, 0, status);
4750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (u.find()) {
4751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            useUText = TRUE;
4752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            u.reset();
4753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p = u.replaceFirst("", status);
4754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // m.reset(p);
4758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (RegexMatcher(UNICODE_STRING_SIMPLE("\\S"), p, 0, status).find()) {
4759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Each option is stripped out of the option string as it is processed.
4760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // All options have been checked.  The option string should have been completely emptied..
4761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            char buf[100];
4762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p.extract(buf, sizeof(buf), NULL, status);
4763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            buf[sizeof(buf)-1] = 0;
4764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Unrecognized or extra parameter:  %s\n", buf);
4765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
4766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (breakType == "char" || breakType == "all") {
4771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        RBBICharMonkey  m;
4772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        BreakIterator  *bi = BreakIterator::createCharacterInstance(locale, status);
4773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_SUCCESS(status)) {
4774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            RunMonkey(bi, m, "char", seed, loopCount, useUText);
4775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (breakType == "all" && useUText==FALSE) {
4776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Also run a quick test with UText when "all" is specified
4777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                RunMonkey(bi, m, "char", seed, loopCount, TRUE);
4778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else {
4781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errcheckln(status, "Creation of character break iterator failed %s", u_errorName(status));
4782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete bi;
4784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (breakType == "word" || breakType == "all") {
4787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln("Word Break Monkey Test");
4788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        RBBIWordMonkey  m;
4789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        BreakIterator  *bi = BreakIterator::createWordInstance(locale, status);
4790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_SUCCESS(status)) {
4791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            RunMonkey(bi, m, "word", seed, loopCount, useUText);
4792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else {
4794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errcheckln(status, "Creation of word break iterator failed %s", u_errorName(status));
4795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete bi;
4797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (breakType == "line" || breakType == "all") {
4800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln("Line Break Monkey Test");
4801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        RBBILineMonkey  m;
4802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        BreakIterator  *bi = BreakIterator::createLineInstance(locale, status);
4803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (loopCount >= 10) {
4804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            loopCount = loopCount / 5;   // Line break runs slower than the others.
4805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_SUCCESS(status)) {
4807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            RunMonkey(bi, m, "line", seed, loopCount, useUText);
4808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else {
4810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status));
4811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete bi;
4813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (breakType == "sent" || breakType == "all"  ) {
4816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln("Sentence Break Monkey Test");
4817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        RBBISentMonkey  m;
4818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        BreakIterator  *bi = BreakIterator::createSentenceInstance(locale, status);
4819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (loopCount >= 10) {
4820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            loopCount = loopCount / 10;   // Sentence runs slower than the other break types
4821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_SUCCESS(status)) {
4823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            RunMonkey(bi, m, "sentence", seed, loopCount, useUText);
4824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else {
4826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status));
4827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete bi;
4829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
4832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
4833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
4835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  Run a RBBI monkey test.  Common routine, for all break iterator types.
4836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    Parameters:
4837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//       bi      - the break iterator to use
4838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//       mk      - MonkeyKind, abstraction for obtaining expected results
4839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//       name    - Name of test (char, word, etc.) for use in error messages
4840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//       seed    - Seed for starting random number generator (parameter from user)
4841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//       numIterations
4842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
4843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, uint32_t  seed,
4844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         int32_t numIterations, UBool useUText) {
4845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS
4847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const int32_t    TESTSTRINGLEN = 500;
4849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString    testText;
4850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t          numCharClasses;
4851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UVector          *chClasses;
4852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int              expected[TESTSTRINGLEN*2 + 1];
4853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int              expectedCount = 0;
4854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char             expectedBreaks[TESTSTRINGLEN*2 + 1];
4855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char             forwardBreaks[TESTSTRINGLEN*2 + 1];
4856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char             reverseBreaks[TESTSTRINGLEN*2+1];
4857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char             isBoundaryBreaks[TESTSTRINGLEN*2+1];
4858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char             followingBreaks[TESTSTRINGLEN*2+1];
4859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char             precedingBreaks[TESTSTRINGLEN*2+1];
4860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int              i;
4861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int              loopCount = 0;
4862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    m_seed = seed;
4864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    numCharClasses = mk.charClasses()->size();
4866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    chClasses      = mk.charClasses();
4867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Check for errors that occured during the construction of the MonkeyKind object.
4869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Can't report them where they occured because errln() is a method coming from intlTest,
4870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  and is not visible outside of RBBITest :-(
4871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(mk.deferredStatus)) {
4872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("status of \"%s\" in creation of RBBIMonkeyKind.", u_errorName(mk.deferredStatus));
4873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
4874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Verify that the character classes all have at least one member.
4877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i=0; i<numCharClasses; i++) {
4878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet *s = (UnicodeSet *)chClasses->elementAt(i);
4879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (s == NULL || s->size() == 0) {
4880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("Character Class #%d is null or of zero size.", i);
4881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
4882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
4884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (loopCount < numIterations || numIterations == -1) {
4886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (numIterations == -1 && loopCount % 10 == 0) {
4887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // If test is running in an infinite loop, display a periodic tic so
4888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   we can tell that it is making progress.
4889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fprintf(stderr, ".");
4890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Save current random number seed, so that we can recreate the random numbers
4892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //   for this loop iteration in event of an error.
4893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        seed = m_seed;
4894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Populate a test string with data.
4896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        testText.truncate(0);
4897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i=0; i<TESTSTRINGLEN; i++) {
4898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t  aClassNum = m_rand() % numCharClasses;
4899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UnicodeSet *classSet = (UnicodeSet *)chClasses->elementAt(aClassNum);
4900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t   charIdx = m_rand() % classSet->size();
4901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UChar32   c = classSet->charAt(charIdx);
4902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (c < 0) {   // TODO:  deal with sets containing strings.
4903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("c < 0");
4904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
4905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testText.append(c);
4907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Calculate the expected results for this test string.
4910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        mk.setText(testText);
4911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        memset(expectedBreaks, 0, sizeof(expectedBreaks));
4912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectedBreaks[0] = 1;
4913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t breakPos = 0;
4914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectedCount = 0;
4915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (;;) {
4916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            breakPos = mk.next(breakPos);
4917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (breakPos == -1) {
4918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
4919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (breakPos > testText.length()) {
4921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("breakPos > testText.length()");
4922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expectedBreaks[breakPos] = 1;
4924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U_ASSERT(expectedCount<testText.length());
4925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expected[expectedCount ++] = breakPos;
4926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Find the break positions using forward iteration
4929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        memset(forwardBreaks, 0, sizeof(forwardBreaks));
4930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (useUText) {
4931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UErrorCode status = U_ZERO_ERROR;
4932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UText *testUText = utext_openReplaceable(NULL, &testText, &status);
4933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // testUText = utext_openUnicodeString(testUText, &testText, &status);
4934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            bi->setText(testUText, status);
4935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            TEST_ASSERT_SUCCESS(status);
4936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utext_close(testUText);   // The break iterator does a shallow clone of the UText
4937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                      //  This UText can be closed immediately, so long as the
4938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                      //  testText string continues to exist.
4939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
4940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            bi->setText(testText);
4941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i=bi->first(); i != BreakIterator::DONE; i=bi->next()) {
4944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (i < 0 || i > testText.length()) {
4945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("%s break monkey test: Out of range value returned by breakIterator::next()", name);
4946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
4947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            forwardBreaks[i] = 1;
4949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Find the break positions using reverse iteration
4952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        memset(reverseBreaks, 0, sizeof(reverseBreaks));
4953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i=bi->last(); i != BreakIterator::DONE; i=bi->previous()) {
4954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (i < 0 || i > testText.length()) {
4955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("%s break monkey test: Out of range value returned by breakIterator::next()", name);
4956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
4957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            reverseBreaks[i] = 1;
4959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Find the break positions using isBoundary() tests.
4962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        memset(isBoundaryBreaks, 0, sizeof(isBoundaryBreaks));
4963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        U_ASSERT((int32_t)sizeof(isBoundaryBreaks) > testText.length());
4964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i=0; i<=testText.length(); i++) {
4965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            isBoundaryBreaks[i] = bi->isBoundary(i);
4966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Find the break positions using the following() function.
4970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // printf(".");
4971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        memset(followingBreaks, 0, sizeof(followingBreaks));
4972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t   lastBreakPos = 0;
4973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        followingBreaks[0] = 1;
4974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i=0; i<testText.length(); i++) {
4975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            breakPos = bi->following(i);
4976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (breakPos <= i ||
4977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                breakPos < lastBreakPos ||
4978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                breakPos > testText.length() ||
4979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                (breakPos > lastBreakPos && lastBreakPos > i)) {
4980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("%s break monkey test: "
4981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    "Out of range value returned by BreakIterator::following().\n"
4982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        "Random seed=%d  index=%d; following returned %d;  lastbreak=%d",
4983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         name, seed, i, breakPos, lastBreakPos);
4984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
4985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
4986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            followingBreaks[breakPos] = 1;
4987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lastBreakPos = breakPos;
4988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
4989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
4990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Find the break positions using the preceding() function.
4991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        memset(precedingBreaks, 0, sizeof(precedingBreaks));
4992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        lastBreakPos = testText.length();
4993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        precedingBreaks[testText.length()] = 1;
4994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i=testText.length(); i>0; i--) {
4995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            breakPos = bi->preceding(i);
4996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (breakPos >= i ||
4997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                breakPos > lastBreakPos ||
4998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                (breakPos < 0 && testText.getChar32Start(i)>0) ||
4999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                (breakPos < lastBreakPos && lastBreakPos < testText.getChar32Start(i)) ) {
5000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("%s break monkey test: "
5001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    "Out of range value returned by BreakIterator::preceding().\n"
5002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    "index=%d;  prev returned %d; lastBreak=%d" ,
5003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    name,  i, breakPos, lastBreakPos);
5004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (breakPos >= 0 && breakPos < (int32_t)sizeof(precedingBreaks)) {
5005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    precedingBreaks[i] = 2;   // Forces an error.
5006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
5007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
5008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (breakPos >= 0) {
5009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    precedingBreaks[breakPos] = 1;
5010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
5011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                lastBreakPos = breakPos;
5012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
5013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
5014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Compare the expected and actual results.
5016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i=0; i<=testText.length(); i++) {
5017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char *errorType = NULL;
5018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if  (forwardBreaks[i] != expectedBreaks[i]) {
5019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errorType = "next()";
5020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else if (reverseBreaks[i] != forwardBreaks[i]) {
5021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errorType = "previous()";
5022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
5023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errorType = "isBoundary()";
5024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else if (followingBreaks[i] != expectedBreaks[i]) {
5025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errorType = "following()";
5026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else if (precedingBreaks[i] != expectedBreaks[i]) {
5027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errorType = "preceding()";
5028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
5029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (errorType != NULL) {
5032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Format a range of the test text that includes the failure as
5033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //  a data item that can be included in the rbbi test data file.
5034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Start of the range is the last point where expected and actual results
5036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //   both agreed that there was a break position.
5037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                int startContext = i;
5038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                int32_t count = 0;
5039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                for (;;) {
5040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if (startContext==0) { break; }
5041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    startContext --;
5042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if (expectedBreaks[startContext] != 0) {
5043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if (count == 2) break;
5044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        count ++;
5045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
5046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
5047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // End of range is two expected breaks past the start position.
5049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                int endContext = i + 1;
5050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                int ci;
5051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                for (ci=0; ci<2; ci++) {  // Number of items to include in error text.
5052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    for (;;) {
5053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if (endContext >= testText.length()) {break;}
5054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if (expectedBreaks[endContext-1] != 0) {
5055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if (count == 0) break;
5056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            count --;
5057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
5058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        endContext ++;
5059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
5060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
5061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Format looks like   "<data>\\\uabcd\uabcd\\\U0001abcd...</data>"
5063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UnicodeString errorText = "<data>";
5064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                /***if (strcmp(errorType, "next()") == 0) {
5065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    startContext = 0;
5066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    endContext = testText.length();
5067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    printStringBreaks(testText, expected, expectedCount);
5069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }***/
5070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                for (ci=startContext; ci<endContext;) {
5072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UnicodeString hexChars("0123456789abcdef");
5073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UChar32  c;
5074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    int      bn;
5075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    c = testText.char32At(ci);
5076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if (ci == i) {
5077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        // This is the location of the error.
5078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        errorText.append("<?>");
5079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    } else if (expectedBreaks[ci] != 0) {
5080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        // This a non-error expected break position.
5081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        errorText.append("\\");
5082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
5083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if (c < 0x10000) {
5084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        errorText.append("\\u");
5085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        for (bn=12; bn>=0; bn-=4) {
5086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            errorText.append(hexChars.charAt((c>>bn)&0xf));
5087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
5088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    } else {
5089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        errorText.append("\\U");
5090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        for (bn=28; bn>=0; bn-=4) {
5091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            errorText.append(hexChars.charAt((c>>bn)&0xf));
5092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
5093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
5094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    ci = testText.moveIndex32(ci, 1);
5095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
5096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errorText.append("\\");
5097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errorText.append("</data>\n");
5098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Output the error
5100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                char  charErrorTxt[500];
5101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UErrorCode status = U_ZERO_ERROR;
5102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errorText.extract(charErrorTxt, sizeof(charErrorTxt), NULL, status);
5103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                charErrorTxt[sizeof(charErrorTxt)-1] = 0;
5104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("%s break monkey test error.  %s. Operation = %s; Random seed = %d;  buf Idx = %d\n%s",
5105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    name, (expectedBreaks[i]? "break expected but not found" : "break found but not expected"),
5106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    errorType, seed, i, charErrorTxt);
5107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
5108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
5109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
5110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        loopCount++;
5112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
5113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
5114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
5115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  Bug 5532.  UTF-8 based UText fails in dictionary code.
5118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//             This test checks the initial patch,
5119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//             which is to just keep it from crashing.  Correct word boundaries
5120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//             await a proper fix to the dictionary code.
5121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
5122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestBug5532(void)  {
5123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)   // Text includes a mixture of Thai and Latin.
5124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)   const unsigned char utf8Data[] = {
5125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           0xE0u, 0xB8u, 0x82u, 0xE0u, 0xB8u, 0xB2u, 0xE0u, 0xB8u, 0xA2u, 0xE0u,
5126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           0xB9u, 0x80u, 0xE0u, 0xB8u, 0x84u, 0xE0u, 0xB8u, 0xA3u, 0xE0u, 0xB8u,
5127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           0xB7u, 0xE0u, 0xB9u, 0x88u, 0xE0u, 0xB8u, 0xADu, 0xE0u, 0xB8u, 0x87u,
5128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           0xE0u, 0xB9u, 0x80u, 0xE0u, 0xB8u, 0xA5u, 0xE0u, 0xB9u, 0x88u, 0xE0u,
5129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           0xB8u, 0x99u, 0xE0u, 0xB8u, 0x8Bu, 0xE0u, 0xB8u, 0xB5u, 0xE0u, 0xB8u,
5130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           0x94u, 0xE0u, 0xB8u, 0xB5u, 0x20u, 0x73u, 0x69u, 0x6Du, 0x20u, 0x61u,
5131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           0x75u, 0x64u, 0x69u, 0x6Fu, 0x2Fu, 0x20u, 0x4Du, 0x4Fu, 0x4Fu, 0x4Eu,
5132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           0x20u, 0x65u, 0x63u, 0x6Cu, 0x69u, 0x70u, 0x73u, 0x65u, 0x20u, 0xE0u,
5133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           0xB8u, 0xA3u, 0xE0u, 0xB8u, 0xB2u, 0xE0u, 0xB8u, 0x84u, 0xE0u, 0xB8u,
5134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           0xB2u, 0x20u, 0x34u, 0x37u, 0x30u, 0x30u, 0x20u, 0xE0u, 0xB8u, 0xA2u,
5135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           0xE0u, 0xB8u, 0xB9u, 0xE0u, 0xB9u, 0x82u, 0xE0u, 0xB8u, 0xA3u, 0x00};
5136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
5138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UText utext=UTEXT_INITIALIZER;
5139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_openUTF8(&utext, (const char *)utf8Data, -1, &status);
5140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT_SUCCESS(status);
5141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *bi = BreakIterator::createWordInstance(Locale("th"), status);
5143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT_SUCCESS(status);
5144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_SUCCESS(status)) {
5145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        bi->setText(&utext, status);
5146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
5147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t breakCount = 0;
5149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t previousBreak = -1;
5150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (bi->first(); bi->next() != BreakIterator::DONE; breakCount++) {
5151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // For now, just make sure that the break iterator doesn't hang.
5152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            TEST_ASSERT(previousBreak < bi->current());
5153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            previousBreak = bi->current();
5154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
5155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(breakCount > 0);
5156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
5157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete bi;
5158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_close(&utext);
5159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
5160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
5163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  TestDebug    -  A place-holder test for debugging purposes.
5164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                  For putting in fragments of other tests that can be invoked
5165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                  for tracing  without a lot of unwanted extra stuff happening.
5166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
5167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestDebug(void) {
5168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0
5169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode   status = U_ZERO_ERROR;
5170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int pos = 0;
5171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int ruleStatus = 0;
5172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator* bi =
5174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)       // (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
5175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)       // (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::Locale("th"), status);
5176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)       (RuleBasedBreakIterator *)BreakIterator::createSentenceInstance(Locale::getDefault(), status);
5177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString s("\\u2008\\u002e\\udc6a\\u37cd\\u71d0\\u2048\\U000e006a\\u002e\\u0046\\ufd3f\\u000a\\u002e");
5178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // UnicodeString s("Aaa.  Bcd");
5179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    s = s.unescape();
5180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bi->setText(s);
5181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool r = bi->isBoundary(8);
5182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("%s", r?"true":"false");
5183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return;
5184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pos = bi->last();
5185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    do {
5186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // ruleStatus = bi->getRuleStatus();
5187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        printf("%d\t%d\n", pos, ruleStatus);
5188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        pos = bi->previous();
5189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } while (pos != BreakIterator::DONE);
5190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
5191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
5192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
5193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
5194