1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/******************************************************************** 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * COPYRIGHT: 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (c) 1999-2010, International Business Machines Corporation and 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * others. All Rights Reserved. 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ********************************************************************/ 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/************************************************************************ 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Date Name Description 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 12/15/99 Madhu Creation. 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 01/12/2000 Madhu Updated for changed API and added new tests 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)************************************************************************/ 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypeinfo.h" // for 'typeid' to work 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_BREAK_ITERATION 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/brkiter.h" 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/rbbi.h" 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchar.h" 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utf16.h" 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucnv.h" 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/schriter.h" 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uniset.h" 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/regex.h" // TODO: make conditional on regexp being built. 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ustring.h" 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utext.h" 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "intltest.h" 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "rbbitst.h" 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <string.h> 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvector.h" 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvectr32.h" 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "triedict.h" 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <string.h> 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <stdio.h> 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <stdlib.h> 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/numfmt.h" 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uscript.h" 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define TEST_ASSERT(x) {if (!(x)) { \ 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Failure in file %s, line %d", __FILE__, __LINE__);}} 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \ 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(errcode));}} 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------- 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// runIndexedTest 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------- 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* params ) 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (exec) logln("TestSuite RuleBasedBreakIterator: "); 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch (index) { 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_FILE_IO 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 0: name = "TestBug4153072"; 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestBug4153072(); break; 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 0: name = "skip"; 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 1: name = "TestJapaneseLineBreak"; 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestJapaneseLineBreak(); break; 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 2: name = "TestStatusReturn"; 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestStatusReturn(); break; 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_FILE_IO 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 3: name = "TestUnicodeFiles"; 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestUnicodeFiles(); break; 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 4: name = "TestEmptyString"; 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestEmptyString(); break; 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 3: case 4: name = "skip"; 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 5: name = "TestGetAvailableLocales"; 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestGetAvailableLocales(); break; 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 6: name = "TestGetDisplayName"; 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestGetDisplayName(); break; 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_FILE_IO 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 7: name = "TestEndBehaviour"; 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestEndBehaviour(); break; 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 8: name = "TestMixedThaiLineBreak"; 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestMixedThaiLineBreak(); break; 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 9: name = "TestThaiLineBreak"; 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestThaiLineBreak(); break; 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 10: name = "TestMaiyamok"; 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestMaiyamok(); break; 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 11: name = "TestWordBreaks"; 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestWordBreaks(); break; 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 12: name = "TestWordBoundary"; 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestWordBoundary(); break; 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 13: name = "TestLineBreaks"; 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestLineBreaks(); break; 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 14: name = "TestSentBreaks"; 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestSentBreaks(); break; 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 15: name = "TestExtended"; 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestExtended(); break; 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 7: case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15: name = "skip"; 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 16: 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) { 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) name = "TestMonkey"; 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TestMonkey(params); 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #else 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) name = "skip"; 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #endif 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_FILE_IO 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 17: name = "TestBug3818"; 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestBug3818(); break; 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 18: name = "TestJapaneseWordBreak"; 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestJapaneseWordBreak(); break; 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 17: case 18: name = "skip"; 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 19: name = "TestDebug"; 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestDebug(); break; 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 20: name = "TestTrieDict"; 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestTrieDict(); break; 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_FILE_IO 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 21: name = "TestBug5775"; 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (exec) TestBug5775(); break; 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 22: name = "TestThaiBreaks"; 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (exec) TestThaiBreaks(); break; 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 23: name = "TestTailoredBreaks"; 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (exec) TestTailoredBreaks(); break; 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 24: name = "TestTrieDictWithValue"; 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(exec) TestTrieDictWithValue(); break; 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 21: case 22: case 23: case 24: name = "skip"; 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 25: name = "TestDictRules"; 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (exec) TestDictRules(); break; 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 25: name = "TestBug5532"; 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (exec) TestBug5532(); break; 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: name = ""; break; //needed to end loop 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------- 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// class BITestData Holds a set of Break iterator test data and results 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Includes 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// - the string data to be broken 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// - a vector of the expected break positions. 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// - a vector of source line numbers for the data, 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// (to help see where errors occured.) 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// - The expected break tag values. 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// - Vectors of actual break positions and tag values. 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// - Functions for comparing actual with expected and 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// reporting errors. 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------- 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class BITestData { 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString fDataToBreak; 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector fExpectedBreakPositions; 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector fExpectedTags; 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector fLineNum; 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector fActualBreakPositions; // Test Results. 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector fActualTags; 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BITestData(UErrorCode &status); 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void addDataChunk(const char *data, int32_t tag, int32_t lineNum, UErrorCode status); 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void checkResults(const char *heading, RBBITest *test); 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void err(const char *heading, RBBITest *test, int32_t expectedIdx, int32_t actualIdx); 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void clearResults(); 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Constructor. 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)BITestData::BITestData(UErrorCode &status) 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles): fExpectedBreakPositions(status), fExpectedTags(status), fLineNum(status), fActualBreakPositions(status), 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fActualTags(status) 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// addDataChunk. Add a section (non-breaking) piece if data to the test data. 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The macro form collects the line number, which is helpful 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// when tracking down failures. 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// A null data item is inserted at the start of each test's data 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// to put the starting zero into the data list. The position saved for 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// each non-null item is its ending position. 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ADD_DATACHUNK(td, data, tag, status) td.addDataChunk(data, tag, __LINE__, status); 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void BITestData::addDataChunk(const char *data, int32_t tag, int32_t lineNum, UErrorCode status) { 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) {return;} 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (data != NULL) { 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDataToBreak.append(CharsToUnicodeString(data)); 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fExpectedBreakPositions.addElement(fDataToBreak.length(), status); 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fExpectedTags.addElement(tag, status); 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLineNum.addElement(lineNum, status); 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// checkResults. Compare the actual and expected break positions, report any differences. 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void BITestData::checkResults(const char *heading, RBBITest *test) { 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t expectedIndex = 0; 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t actualIndex = 0; 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If we've run through both the expected and actual results vectors, we're done. 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // break out of the loop. 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expectedIndex >= fExpectedBreakPositions.size() && 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) actualIndex >= fActualBreakPositions.size()) { 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expectedIndex >= fExpectedBreakPositions.size()) { 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) err(heading, test, expectedIndex-1, actualIndex); 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) actualIndex++; 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (actualIndex >= fActualBreakPositions.size()) { 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) err(heading, test, expectedIndex, actualIndex-1); 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectedIndex++; 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fActualBreakPositions.elementAti(actualIndex) != fExpectedBreakPositions.elementAti(expectedIndex)) { 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) err(heading, test, expectedIndex, actualIndex); 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Try to resync the positions of the indices, to avoid a rash of spurious erros. 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fActualBreakPositions.elementAti(actualIndex) < fExpectedBreakPositions.elementAti(expectedIndex)) { 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) actualIndex++; 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectedIndex++; 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fActualTags.elementAti(actualIndex) != fExpectedTags.elementAti(expectedIndex)) { 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) test->errln("%s, tag mismatch. Test Line = %d, expected tag=%d, got %d", 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) heading, fLineNum.elementAt(expectedIndex), 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fExpectedTags.elementAti(expectedIndex), fActualTags.elementAti(actualIndex)); 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) actualIndex++; 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectedIndex++; 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// err - An error was found. Report it, along with information about where the 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// incorrectly broken test data appeared in the source file. 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void BITestData::err(const char *heading, RBBITest *test, int32_t expectedIdx, int32_t actualIdx) 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t expected = fExpectedBreakPositions.elementAti(expectedIdx); 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t actual = fActualBreakPositions.elementAti(actualIdx); 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t o = 0; 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t line = fLineNum.elementAti(expectedIdx); 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expectedIdx > 0) { 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The line numbers are off by one because a premature break occurs somewhere 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // within the previous item, rather than at the start of the current (expected) item. 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We want to report the offset of the unexpected break from the start of 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // this previous item. 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) o = actual - fExpectedBreakPositions.elementAti(expectedIdx-1); 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (actual < expected) { 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) test->errln("%s unexpected break at offset %d in test item from line %d. actual break: %d expected break: %d", heading, o, line, actual, expected); 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) test->errln("%s Failed to find break at end of item from line %d. actual break: %d expected break: %d", heading, line, actual, expected); 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void BITestData::clearResults() { 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fActualBreakPositions.removeAllElements(); 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fActualTags.removeAllElements(); 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------------- 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Cannned Test Characters 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------------- 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const UChar cannedTestArray[] = { 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x0001, 0x0002, 0x0003, 0x0004, 0x0020, 0x0021, '\\', 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028, 0x0029, 0x002b, 0x002d, 0x0030, 0x0031, 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x0032, 0x0033, 0x0034, 0x003c, 0x003d, 0x003e, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x005b, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x007b, 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x007d, 0x007c, 0x002c, 0x00a0, 0x00a2, 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00ab, 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b2, 0x00b3, 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x00b4, 0x00b9, 0x00bb, 0x00bc, 0x00bd, 0x02b0, 0x02b1, 0x02b2, 0x02b3, 0x02b4, 0x0300, 0x0301, 0x0302, 0x0303, 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x0304, 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x0903, 0x093e, 0x093f, 0x0940, 0x0949, 0x0f3a, 0x0f3b, 0x2000, 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x2001, 0x2002, 0x200c, 0x200d, 0x200e, 0x200f, 0x2010, 0x2011, 0x2012, 0x2028, 0x2029, 0x202a, 0x203e, 0x203f, 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x2040, 0x20dd, 0x20de, 0x20df, 0x20e0, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x0000 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UnicodeString* cannedTestChars = 0; 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define halfNA "\\u0928\\u094d\\u200d" 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define halfSA "\\u0938\\u094d\\u200d" 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define halfCHA "\\u091a\\u094d\\u200d" 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define halfKA "\\u0915\\u094d\\u200d" 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define deadTA "\\u0924\\u094d" 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------- 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// RBBITest constructor and destructor 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------- 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBITest::RBBITest() { 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString temp(cannedTestArray); 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cannedTestChars = new UnicodeString(); 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *cannedTestChars += (UChar)0x0000; 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *cannedTestChars += temp; 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBITest::~RBBITest() { 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete cannedTestChars; 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int T_NUMBER = 100; 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int T_LETTER = 200; 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int T_H_OR_K = 300; 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int T_IDEO = 400; 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------- 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//Testing the BreakIterator for devanagari script 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------- 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define deadRA "\\u0930\\u094d" /*deadform RA = devanagari RA + virama*/ 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define deadPHA "\\u092b\\u094d" /*deadform PHA = devanagari PHA + virama*/ 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define deadTTHA "\\u0920\\u094d" 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define deadPA "\\u092a\\u094d" 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define deadSA "\\u0938\\u094d" 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define visarga "\\u0903" /*devanagari visarga looks like a english colon*/ 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------------- 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Test for status {tag} return value from break rules. 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TODO: a more thorough test. 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------------- 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestStatusReturn() { 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString rulesString1("$Letters = [:L:];\n" 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "$Numbers = [:N:];\n" 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "$Letters+{1};\n" 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "$Numbers+{2};\n" 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "Help\\ {4}/me\\!;\n" 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "[^$Letters $Numbers];\n" 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "!.*;\n", -1, US_INV); 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString testString1 = "abc123..abc Help me Help me!"; 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 01234567890123456789012345678 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t bounds1[] = {0, 3, 6, 7, 8, 11, 12, 16, 17, 19, 20, 25, 27, 28, -1}; 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t brkStatus[] = {0, 1, 2, 0, 0, 1, 0, 1, 0, 1, 0, 4, 1, 0, -1}; 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status=U_ZERO_ERROR; 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError parseError; 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(status)) { 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataerrln("FAIL : in construction - %s", u_errorName(status)); 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t pos; 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i = 0; 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi->setText(testString1); 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (pos=bi->first(); pos!= BreakIterator::DONE; pos=bi->next()) { 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pos != bounds1[i]) { 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("FAIL: expected break at %d, got %d\n", bounds1[i], pos); 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int tag = bi->getRuleStatus(); 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (tag != brkStatus[i]) { 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("FAIL: break at %d, expected tag %d, got tag %d\n", pos, brkStatus[i], tag); 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i++; 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void printStringBreaks(UnicodeString ustr, int expected[], 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expectedcount) 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char name[100]; 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("code alpha extend alphanum type word sent line name\n"); 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int j; 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (j = 0; j < ustr.length(); j ++) { 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expectedcount > 0) { 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int k; 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (k = 0; k < expectedcount; k ++) { 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (j == expected[k]) { 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("------------------------------------------------ %d\n", 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j); 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c = ustr.char32At(j); 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c > 0xffff) { 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j ++; 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status); 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c, 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_isUAlphabetic(c), 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND), 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_isalnum(c), 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_getPropertyValueName(UCHAR_GENERAL_CATEGORY, 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_charType(c), 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_SHORT_PROPERTY_NAME), 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_getPropertyValueName(UCHAR_WORD_BREAK, 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_getIntPropertyValue(c, 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCHAR_WORD_BREAK), 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_SHORT_PROPERTY_NAME), 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_getPropertyValueName(UCHAR_SENTENCE_BREAK, 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_getIntPropertyValue(c, 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCHAR_SENTENCE_BREAK), 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_SHORT_PROPERTY_NAME), 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_getPropertyValueName(UCHAR_LINE_BREAK, 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_getIntPropertyValue(c, 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCHAR_LINE_BREAK), 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_SHORT_PROPERTY_NAME), 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) name); 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestThaiLineBreak() { 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BITestData thaiLineSelection(status); 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // \u0e2f-- the Thai paiyannoi character-- isn't a letter. It's a symbol that 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // represents elided letters at the end of a long word. It should be bound to 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the end of the word and not treated as an independent punctuation mark. 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, NULL, 0, status); // Break at start of data 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e2a\\u0e16\\u0e32\\u0e19\\u0e35\\u0e2f", 0, status); 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e08\\u0e30", 0, status); 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e14\\u0e21", 0, status); 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e08\\u0e49\\u0e32", 0, status); 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// ADD_DATACHUNK(thaiLineSelection, "\\u0e2b\\u0e19\\u0e49\\u0e32", 0, status); 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// ADD_DATACHUNK(thaiLineSelection, "\\u0e17\\u0e35\\u0e48", 0, status); 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48", 0, status); 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the commented-out lines (I think) are the preferred result; this line is what our current dictionary is giving us 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e2d\\u0e2d\\u0e01", 0, status); 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e21\\u0e32", 0, status); 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e23\\u0e48\\u0e07", 0, status); 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e1a\\u0e32\\u0e22", 0, status); 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e2d\\u0e22\\u0e48\\u0e32\\u0e07", 0, status); 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e15\\u0e47\\u0e21", 0, status); 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the one time where the paiyannoi occurs somewhere other than at the end 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // of a word is in the Thai abbrevation for "etc.", which both begins and 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // ends with a paiyannoi 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e2f\\u0e25\\u0e2f", 0, status); 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e17\\u0e35\\u0e48", 0, status); 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e19\\u0e31\\u0e49\\u0e19", 0, status); 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createLineInstance( 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale("th"), status); 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Failed to create the BreakIterator for Thai locale in TestThaiLineBreak. - %s", u_errorName(status)); 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) generalIteratorTest(*e, thaiLineSelection); 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete e; 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestMixedThaiLineBreak() 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BITestData thaiLineSelection(status); 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, NULL, 0, status); // Break at start of data 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // @suwit -- Test Arabic numerals, Thai numerals, Punctuation and English characters 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // start 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E1B\\u0E35", 0, status); 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E1E\\u0E38\\u0E17\\u0E18\\u0E28\\u0E31\\u0E01\\u0E23\\u0E32\\u0E0A ", 0, status); 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "2545 ", 0, status); 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E40\\u0E1B\\u0E47\\u0E19", 0, status); 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E1B\\u0E35", 0, status); 523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E09\\u0E25\\u0E2D\\u0E07", 0, status); 524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E04\\u0E23\\u0E1A", 0, status); 525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E23\\u0E2D\\u0E1A ", 0, status); 526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\"\\u0E52\\u0E52\\u0E50 ", 0, status); 527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E1b\\u0E35\" ", 0, status); 528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E02\\u0E2d\\u0E07", 0, status); 529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E01\\u0E23\\u0E38\\u0E07", 0, status); 530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E23\\u0E31\\u0E15\\u0E19\\u0E42\\u0E01\\u0E2A\\u0E34\\u0E19\\u0E17\\u0E23\\u0E4C ", 0, status); 531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "(\\u0E01\\u0E23\\u0E38\\u0E07\\u0E40\\u0E17\\u0E1e\\u0E2F", 0, status); 532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0E2B\\u0E23\\u0E37\\u0E2D ", 0, status); 533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "Bangkok)", 0, status); 534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // @suwit - end of changes 536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale("th"), status); 539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) 540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Failed to create the BreakIterator for Thai locale in TestMixedThaiLineBreak. - %s", u_errorName(status)); 542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) generalIteratorTest(*e, thaiLineSelection); 547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete e; 548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestMaiyamok() 552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BITestData thaiLineSelection(status); 555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, NULL, 0, status); // Break at start of data 556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the Thai maiyamok character is a shorthand symbol that means "repeat the previous 557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // word". Instead of appearing as a word unto itself, however, it's kept together 558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // with the word before it 559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e44\\u0e1b\\u0e46", 0, status); 560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e21\\u0e32\\u0e46", 0, status); 561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e2b\\u0e27\\u0e48\\u0e32\\u0e07", 0, status); 562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e01\\u0e23\\u0e38\\u0e07", 0, status); 563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e17\\u0e1e", 0, status); 564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e41\\u0e25\\u0e30", 0, status); 565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e03\\u0e35", 0, status); 566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e22\\u0e07", 0, status); 567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(thaiLineSelection, "\\u0e43\\u0e2b\\u0e21\\u0e48", 0, status); 568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createLineInstance( 570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale("th"), status); 571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) 573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Failed to create the BreakIterator for Thai locale in TestMaiyamok. - %s", u_errorName(status)); 575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) generalIteratorTest(*e, thaiLineSelection); 578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete e; 579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestBug3818() { 584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Four Thai words... 587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UChar thaiWordData[] = { 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0x0E2B,0x0E0D,0x0E48, 588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0x0E2B,0x0E0D,0x0E48, 0 }; 589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString thaiStr(thaiWordData); 590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator* bi = 592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale("th"), status); 593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status) || bi == NULL) { 594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Fail at file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status)); 595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi->setText(thaiStr); 598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t startOfSecondWord = bi->following(1); 600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (startOfSecondWord != 4) { 601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Fail at file %s, line %d expected start of word at 4, got %d", 602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) __FILE__, __LINE__, startOfSecondWord); 603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) startOfSecondWord = bi->following(0); 605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (startOfSecondWord != 4) { 606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Fail at file %s, line %d expected start of word at 4, got %d", 607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) __FILE__, __LINE__, startOfSecondWord); 608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestJapaneseWordBreak() { 614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TODO: Rewrite this test for a dictionary-based word breaking. 615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BITestData japaneseWordSelection(status); 618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(japaneseWordSelection, NULL, 0, status); // Break at start of data 620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(japaneseWordSelection, "\\u4ECA\\u65E5", 400, status); //2 621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(japaneseWordSelection, "\\u306F\\u3044\\u3044", 300, status); //5 622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(japaneseWordSelection, "\\u5929\\u6C17", 400, status); //7 623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(japaneseWordSelection, "\\u3067\\u3059\\u306D", 300, status); //10 624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(japaneseWordSelection, "\\u3002", 0, status); //11 625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(japaneseWordSelection, "\\u000D\\u000A", 0, status); //12 626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createWordInstance( 628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale("ja"), status); 629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) 630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Failed to create the BreakIterator for Japanese locale in TestJapaneseWordBreak.\n"); 632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) generalIteratorTest(*e, japaneseWordSelection); 636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete e; 637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestTrieDict() { 641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Open and read the test data file. 645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *testDataDirectory = IntlTest::getSourceTestData(status); 647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char testFileName[1000]; 648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testDataDirectory == NULL || strlen(testDataDirectory) + strlen("riwords.txt") + 10 >= sizeof(testFileName)) { 649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Can't open test data. Path too long."); 650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) strcpy(testFileName, testDataDirectory); 653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) strcat(testFileName, "riwords.txt"); 654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Items needing deleting at the end 656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) MutableTrieDictionary *mutableDict = NULL; 657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CompactTrieDictionary *compactDict = NULL; 658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *breaks = NULL; 659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *testFile = NULL; 660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) StringEnumeration *enumer1 = NULL; 661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) StringEnumeration *enumer2 = NULL; 662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) MutableTrieDictionary *mutable2 = NULL; 663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) StringEnumeration *cloneEnum = NULL; 664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CompactTrieDictionary *compact2 = NULL; 665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *originalWord = NULL; 668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *cloneWord = NULL; 669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *current; 670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *word; 671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar uc; 672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t wordLen; 673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t wordCount; 674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t testCount; 675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int len; 677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testFile = ReadAndConvertFile(testFileName, len, NULL, status); 678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; /* something went wrong, error already output */ 680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mutableDict = new MutableTrieDictionary(0x0E1C, status); 683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Error creating MutableTrieDictionary: %s\n", u_errorName(status)); 685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breaks = new UnicodeSet; 689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breaks->add(0x000A); // Line Feed 690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breaks->add(0x000D); // Carriage Return 691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breaks->add(0x2028); // Line Separator 692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breaks->add(0x2029); // Paragraph Separator 693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Now add each non-comment line of the file as a word. 695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) current = testFile; 696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) word = current; 697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uc = *current++; 698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) wordLen = 0; 699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) wordCount = 0; 700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (uc) { 702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (uc == 0x0023) { // #comment line, skip 703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (uc && !breaks->contains(uc)) { 704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uc = *current++; 705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else while (uc && !breaks->contains(uc)) { 708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++wordLen; 709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uc = *current++; 710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wordLen > 0) { 712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mutableDict->addWord(word, wordLen, status); 713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not add word to mutable dictionary; status %s\n", u_errorName(status)); 715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) wordCount += 1; 718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Find beginning of next line 721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (uc && breaks->contains(uc)) { 722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uc = *current++; 723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) word = current-1; 725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) wordLen = 0; 726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wordCount < 50) { 729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Word count (%d) unreasonably small\n", wordCount); 730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer1 = mutableDict->openWords(status); 734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not open mutable dictionary enumerator: %s\n", u_errorName(status)); 736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testCount = 0; 740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wordCount != (testCount = enumer1->count(status))) { 741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("MutableTrieDictionary word count (%d) differs from file word count (%d), with status %s\n", 742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testCount, wordCount, u_errorName(status)); 743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Now compact it 747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compactDict = new CompactTrieDictionary(*mutableDict, status); 748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Failed to create CompactTrieDictionary: %s\n", u_errorName(status)); 750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer2 = compactDict->openWords(status); 754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not open compact trie dictionary enumerator: %s\n", u_errorName(status)); 756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wordCount != (testCount = enumer2->count(status))) { 760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("CompactTrieDictionary word count (%d) differs from file word count (%d), with status %s\n", 761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testCount, wordCount, u_errorName(status)); 762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (typeid(*enumer1) == typeid(*enumer2)) { 766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("CompactTrieEnumeration and MutableTrieEnumeration typeids are the same"); 767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete enumer1; 769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer1 = NULL; 770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete enumer2; 771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer2 = NULL; 772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Now un-compact it 774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mutable2 = compactDict->cloneMutable(status); 775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not clone CompactTrieDictionary to MutableTrieDictionary: %s\n", u_errorName(status)); 777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cloneEnum = mutable2->openWords(status); 781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not create cloned mutable enumerator: %s\n", u_errorName(status)); 783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wordCount != (testCount = cloneEnum->count(status))) { 787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Cloned MutableTrieDictionary word count (%d) differs from file word count (%d), with status %s\n", 788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testCount, wordCount, u_errorName(status)); 789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Compact original dictionary to clone. Note that we can only compare the same kind of 793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // dictionary as the order of the enumerators is not guaranteed to be the same between 794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // different kinds 795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer1 = mutableDict->openWords(status); 796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not re-open mutable dictionary enumerator: %s\n", u_errorName(status)); 798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) originalWord = enumer1->snext(status); 802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cloneWord = cloneEnum->snext(status); 803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (U_SUCCESS(status) && originalWord != NULL && cloneWord != NULL) { 804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (*originalWord != *cloneWord) { 805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Original and cloned MutableTrieDictionary word mismatch\n"); 806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) originalWord = enumer1->snext(status); 809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cloneWord = cloneEnum->snext(status); 810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Enumeration failed: %s\n", u_errorName(status)); 814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (originalWord != cloneWord) { 818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Original and cloned MutableTrieDictionary ended enumeration at different points\n"); 819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Test the data copying constructor for CompactTrieDict, and the data access APIs. 823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compact2 = new CompactTrieDictionary(compactDict->data(), status); 824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("CompactTrieDictionary(const void *,...) failed\n"); 826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (compact2->dataSize() == 0) { 830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("CompactTrieDictionary->dataSize() == 0\n"); 831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Now count the words via the second dictionary 835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete enumer1; 836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer1 = compact2->openWords(status); 837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not open compact trie dictionary 2 enumerator: %s\n", u_errorName(status)); 839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wordCount != (testCount = enumer1->count(status))) { 843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("CompactTrieDictionary 2 word count (%d) differs from file word count (%d), with status %s\n", 844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testCount, wordCount, u_errorName(status)); 845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)cleanup: 849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete compactDict; 850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete mutableDict; 851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete breaks; 852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete[] testFile; 853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete enumer1; 854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete mutable2; 855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete cloneEnum; 856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete compact2; 857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*TODO: delete later*/ 860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)inline void writeEnumerationToFile(StringEnumeration *enumer, char *filename){ 861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) FILE *outfile = fopen(filename,"w"); 863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UConverter *cvt = ucnv_open("UTF-8", &status); 864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) 865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(outfile != NULL){ 867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_ZERO_ERROR; 868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *word = enumer->snext(status); 869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (word != NULL && U_SUCCESS(status)) { 870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char u8word[500]; 871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_ZERO_ERROR; 872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_fromUChars(cvt, u8word, 500, word->getBuffer(), word->length(), 873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &status); 874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(outfile,"%s\n", u8word); 875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_ZERO_ERROR; 876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) word = enumer->snext(status); 877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fclose(outfile); 879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_close(cvt); 881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// A very simple helper class to streamline the buffer handling in 884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TestTrieDictWithValue 885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)template<class T, size_t N> 886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class AutoBuffer { 887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) public: 888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) AutoBuffer(size_t size) : buffer(stackBuffer) { 889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (size > N) 890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buffer = new T[size]; 891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ~AutoBuffer() { 893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (buffer != stackBuffer) 894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete [] buffer; 895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T* elems() { 897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return buffer; 898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const T& operator[] (size_t i) const { 900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return buffer[i]; 901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T& operator[] (size_t i) { 903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return buffer[i]; 904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) private: 906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T stackBuffer[N]; 907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) T* buffer; 908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) AutoBuffer(); 909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------- 912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TestTrieDictWithValue Test trie dictionaries with logprob values and 914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// more than 2^16 nodes after compaction. 915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------- 917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestTrieDictWithValue() { 918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Open and read the test data file. 922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *testDataDirectory = IntlTest::getSourceTestData(status); 924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *filename = "cjdict-truncated.txt"; 925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char testFileName[1000]; 926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testDataDirectory == NULL || strlen(testDataDirectory) + strlen(filename) + 10 >= sizeof(testFileName)) { 927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Can't open test data. Path too long."); 928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) strcpy(testFileName, testDataDirectory); 931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) strcat(testFileName, filename); 932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Items needing deleting at the end 934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) MutableTrieDictionary *mutableDict = NULL; 935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CompactTrieDictionary *compactDict = NULL; 936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *breaks = NULL; 937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *testFile = NULL; 938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) StringEnumeration *enumer1 = NULL; 939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) StringEnumeration *enumer2 = NULL; 940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) MutableTrieDictionary *mutable2 = NULL; 941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) StringEnumeration *cloneEnum = NULL; 942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CompactTrieDictionary *compact2 = NULL; 943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NumberFormat *nf = NULL; 944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UText *originalText = NULL, *cloneText = NULL; 945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *originalWord = NULL; 947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *cloneWord = NULL; 948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *current; 949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *word; 950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar uc; 951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t wordLen; 952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t wordCount; 953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t testCount; 954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t valueLen; 955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int counter = 0; 956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int len; 958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testFile = ReadAndConvertFile(testFileName, len, NULL, status); 959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; /* something went wrong, error already output */ 961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mutableDict = new MutableTrieDictionary(0x0E1C, status, TRUE); 964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Error creating MutableTrieDictionary: %s\n", u_errorName(status)); 966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breaks = new UnicodeSet; 970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breaks->add(0x000A); // Line Feed 971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breaks->add(0x000D); // Carriage Return 972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breaks->add(0x2028); // Line Separator 973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breaks->add(0x2029); // Paragraph Separator 974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breaks->add(0x0009); // Tab character 975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Now add each non-comment line of the file as a word. 977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) current = testFile; 978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) word = current; 979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uc = *current++; 980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) wordLen = 0; 981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) wordCount = 0; 982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nf = NumberFormat::createInstance(status); 983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (uc) { 985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString ucharValue; 986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) valueLen = 0; 987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (uc == 0x0023) { // #comment line, skip 989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (uc && !breaks->contains(uc)) { 990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uc = *current++; 991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else{ 994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (uc && !breaks->contains(uc)) { 995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++wordLen; 996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uc = *current++; 997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(uc == 0x0009){ //separator is a tab char, read in num after tab 999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uc = *current++; 1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (uc && !breaks->contains(uc)) { 1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucharValue.append(uc); 1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uc = *current++; 1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wordLen > 0) { 1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Formattable value((int32_t)0); 1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nf->parse(ucharValue.getTerminatedBuffer(), value, status); 1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(status)){ 1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("parsing of value failed when reading in dictionary\n"); 1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mutableDict->addWord(word, wordLen, status, value.getLong()); 1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not add word to mutable dictionary; status %s\n", u_errorName(status)); 1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) wordCount += 1; 1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Find beginning of next line 1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (uc && breaks->contains(uc)) { 1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uc = *current++; 1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) word = current-1; 1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) wordLen = 0; 1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wordCount < 50) { 1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Word count (%d) unreasonably small\n", wordCount); 1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer1 = mutableDict->openWords(status); 1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not open mutable dictionary enumerator: %s\n", u_errorName(status)); 1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testCount = 0; 1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wordCount != (testCount = enumer1->count(status))) { 1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("MutableTrieDictionary word count (%d) differs from file word count (%d), with status %s\n", 1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testCount, wordCount, u_errorName(status)); 1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Now compact it 1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compactDict = new CompactTrieDictionary(*mutableDict, status); 1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Failed to create CompactTrieDictionary: %s\n", u_errorName(status)); 1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer2 = compactDict->openWords(status); 1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not open compact trie dictionary enumerator: %s\n", u_errorName(status)); 1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //delete later 1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// writeEnumerationToFile(enumer1, "/home/jchye/mutable.txt"); 1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// writeEnumerationToFile(enumer2, "/home/jchye/compact.txt"); 1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer1->reset(status); 1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer2->reset(status); 1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) originalWord = enumer1->snext(status); 1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cloneWord = enumer2->snext(status); 1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (U_SUCCESS(status) && originalWord != NULL && cloneWord != NULL) { 1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (*originalWord != *cloneWord) { 1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("MutableTrieDictionary and CompactTrieDictionary word mismatch at %d, lengths are %d and %d\n", 1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) counter, originalWord->length(), cloneWord->length()); 1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // check if attached values of the same word in both dictionaries tally 1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lengths1[originalWord->length()], lengths2[cloneWord->length()]; 1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint16_t values1[originalWord->length()], values2[cloneWord->length()]; 1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) AutoBuffer<int32_t, 20> lengths1(originalWord->length()); 1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) AutoBuffer<int32_t, 20> lengths2(cloneWord->length()); 1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) AutoBuffer<uint16_t, 20> values1(originalWord->length()); 1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) AutoBuffer<uint16_t, 20> values2(cloneWord->length()); 1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) originalText = utext_openConstUnicodeString(originalText, originalWord, &status); 1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cloneText = utext_openConstUnicodeString(cloneText, cloneWord, &status); 1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int count1, count2; 1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mutableDict->matches(originalText, originalWord->length(), lengths1.elems(), count1, originalWord->length(), values1.elems()); 1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compactDict->matches(cloneText, cloneWord->length(), lengths2.elems(), count2, cloneWord->length(), values2.elems()); 1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(values1[count1-1] != values2[count2-1]){ 1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Values of word %d in MutableTrieDictionary and CompactTrieDictionary do not match, with values %d and %d\n", 1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) counter, values1[count1-1], values2[count2-1]); 1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) counter++; 1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) originalWord = enumer1->snext(status); 1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cloneWord = enumer2->snext(status); 1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (enumer1->getDynamicClassID() == enumer2->getDynamicClassID()) { 1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("CompactTrieEnumeration and MutableTrieEnumeration ClassIDs are the same"); 1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete enumer1; 1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer1 = NULL; 1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete enumer2; 1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer2 = NULL; 1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Now un-compact it 1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mutable2 = compactDict->cloneMutable(status); 1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not clone CompactTrieDictionary to MutableTrieDictionary: %s\n", u_errorName(status)); 1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cloneEnum = mutable2->openWords(status); 1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not create cloned mutable enumerator: %s\n", u_errorName(status)); 1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wordCount != (testCount = cloneEnum->count(status))) { 1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Cloned MutableTrieDictionary word count (%d) differs from file word count (%d), with status %s\n", 1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testCount, wordCount, u_errorName(status)); 1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Compact original dictionary to clone. Note that we can only compare the same kind of 1134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // dictionary as the order of the enumerators is not guaranteed to be the same between 1135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // different kinds 1136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer1 = mutableDict->openWords(status); 1137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not re-open mutable dictionary enumerator: %s\n", u_errorName(status)); 1139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) counter = 0; 1143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) originalWord = enumer1->snext(status); 1144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cloneWord = cloneEnum->snext(status); 1145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (U_SUCCESS(status) && originalWord != NULL && cloneWord != NULL) { 1146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (*originalWord != *cloneWord) { 1147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Original and cloned MutableTrieDictionary word mismatch\n"); 1148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // check if attached values of the same word in both dictionaries tally 1152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) AutoBuffer<int32_t, 20> lengths1(originalWord->length()); 1153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) AutoBuffer<int32_t, 20> lengths2(cloneWord->length()); 1154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) AutoBuffer<uint16_t, 20> values1(originalWord->length()); 1155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) AutoBuffer<uint16_t, 20> values2(cloneWord->length()); 1156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) originalText = utext_openConstUnicodeString(originalText, originalWord, &status); 1157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cloneText = utext_openConstUnicodeString(cloneText, cloneWord, &status); 1158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int count1, count2; 1160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mutableDict->matches(originalText, originalWord->length(), lengths1.elems(), count1, originalWord->length(), values1.elems()); 1161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mutable2->matches(cloneText, cloneWord->length(), lengths2.elems(), count2, cloneWord->length(), values2.elems()); 1162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(values1[count1-1] != values2[count2-1]){ 1164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Values of word %d in original and cloned MutableTrieDictionary do not match, with values %d and %d\n", 1165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) counter, values1[count1-1], values2[count2-1]); 1166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) counter++; 1170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) originalWord = enumer1->snext(status); 1172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cloneWord = cloneEnum->snext(status); 1173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Enumeration failed: %s\n", u_errorName(status)); 1177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (originalWord != cloneWord) { 1181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Original and cloned MutableTrieDictionary ended enumeration at different points\n"); 1182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Test the data copying constructor for CompactTrieDict, and the data access APIs. 1186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compact2 = new CompactTrieDictionary(compactDict->data(), status); 1187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("CompactTrieDictionary(const void *,...) failed\n"); 1189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (compact2->dataSize() == 0) { 1193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("CompactTrieDictionary->dataSize() == 0\n"); 1194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Now count the words via the second dictionary 1198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete enumer1; 1199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumer1 = compact2->openWords(status); 1200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Could not open compact trie dictionary 2 enumerator: %s\n", u_errorName(status)); 1202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wordCount != (testCount = enumer1->count(status))) { 1206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("CompactTrieDictionary 2 word count (%d) differs from file word count (%d), with status %s\n", 1207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testCount, wordCount, u_errorName(status)); 1208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 1209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cleanup: 1212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete compactDict; 1213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete mutableDict; 1214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete breaks; 1215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete[] testFile; 1216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete enumer1; 1217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete mutable2; 1218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete cloneEnum; 1219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete compact2; 1220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_close(originalText); 1221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_close(cloneText); 1222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------- 1227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// generalIteratorTest Given a break iterator and a set of test data, 1229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Run the tests and report the results. 1230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------- 1232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::generalIteratorTest(RuleBasedBreakIterator& bi, BITestData &td) 1233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi.setText(td.fDataToBreak); 1236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testFirstAndNext(bi, td); 1238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testLastAndPrevious(bi, td); 1240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testFollowing(bi, td); 1242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testPreceding(bi, td); 1243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testIsBoundary(bi, td); 1244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) doMultipleSelectionTest(bi, td); 1245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// testFirstAndNext. Run the iterator forwards in the obvious first(), next() 1250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// kind of loop. 1251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::testFirstAndNext(RuleBasedBreakIterator& bi, BITestData &td) 1253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t p; 1256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lastP = -1; 1257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t tag; 1258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Test first and next"); 1260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi.setText(td.fDataToBreak); 1261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.clearResults(); 1262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (p=bi.first(); p!=RuleBasedBreakIterator::DONE; p=bi.next()) { 1264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualBreakPositions.addElement(p, status); // Save result. 1265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tag = bi.getRuleStatus(); 1266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualTags.addElement(tag, status); 1267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p <= lastP) { 1268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If the iterator is not making forward progress, stop. 1269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No need to raise an error here, it'll be detected in the normal check of results. 1270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lastP = p; 1273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.checkResults("testFirstAndNext", this); 1275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TestLastAndPrevious. Run the iterator backwards, starting with last(). 1280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::testLastAndPrevious(RuleBasedBreakIterator& bi, BITestData &td) 1282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t p; 1285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lastP = 0x7ffffffe; 1286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t tag; 1287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Test last and previous"); 1289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi.setText(td.fDataToBreak); 1290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.clearResults(); 1291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (p=bi.last(); p!=RuleBasedBreakIterator::DONE; p=bi.previous()) { 1293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Save break position. Insert it at start of vector of results, shoving 1294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // already-saved results further towards the end. 1295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualBreakPositions.insertElementAt(p, 0, status); 1296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // bi.previous(); // TODO: Why does this fix things up???? 1297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // bi.next(); 1298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tag = bi.getRuleStatus(); 1299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualTags.insertElementAt(tag, 0, status); 1300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p >= lastP) { 1301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If the iterator is not making progress, stop. 1302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No need to raise an error here, it'll be detected in the normal check of results. 1303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lastP = p; 1306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.checkResults("testLastAndPrevious", this); 1308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::testFollowing(RuleBasedBreakIterator& bi, BITestData &td) 1312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t p; 1315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t tag; 1316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lastP = -2; // A value that will never be returned as a break position. 1317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // cannot be -1; that is returned for DONE. 1318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 1319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("testFollowing():"); 1321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi.setText(td.fDataToBreak); 1322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.clearResults(); 1323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Save the starting point, since we won't get that out of following. 1325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p = bi.first(); 1326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualBreakPositions.addElement(p, status); // Save result. 1327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tag = bi.getRuleStatus(); 1328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualTags.addElement(tag, status); 1329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i <= td.fDataToBreak.length()+1; i++) { 1331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p = bi.following(i); 1332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p != lastP) { 1333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p == RuleBasedBreakIterator::DONE) { 1334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We've reached a new break position. Save it. 1337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualBreakPositions.addElement(p, status); // Save result. 1338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tag = bi.getRuleStatus(); 1339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualTags.addElement(tag, status); 1340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lastP = p; 1341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The loop normally exits by means of the break in the middle. 1344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Make sure that the index was at the correct position for the break iterator to have 1345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // returned DONE. 1346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (i != td.fDataToBreak.length()) { 1347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("testFollowing(): iterator returned DONE prematurely."); 1348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Full check of all results. 1351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.checkResults("testFollowing", this); 1352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::testPreceding(RuleBasedBreakIterator& bi, BITestData &td) { 1357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t p; 1359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t tag; 1360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lastP = 0x7ffffffe; 1361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 1362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("testPreceding():"); 1364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi.setText(td.fDataToBreak); 1365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.clearResults(); 1366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p = bi.last(); 1368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualBreakPositions.addElement(p, status); 1369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tag = bi.getRuleStatus(); 1370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualTags.addElement(tag, status); 1371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = td.fDataToBreak.length(); i>=-1; i--) { 1373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p = bi.preceding(i); 1374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p != lastP) { 1375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p == RuleBasedBreakIterator::DONE) { 1376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We've reached a new break position. Save it. 1379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualBreakPositions.insertElementAt(p, 0, status); 1380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lastP = p; 1381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tag = bi.getRuleStatus(); 1382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualTags.insertElementAt(tag, 0, status); 1383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The loop normally exits by means of the break in the middle. 1386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Make sure that the index was at the correct position for the break iterator to have 1387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // returned DONE. 1388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (i != 0) { 1389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("testPreceding(): iterator returned DONE prematurely."); 1390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Full check of all results. 1393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.checkResults("testPreceding", this); 1394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::testIsBoundary(RuleBasedBreakIterator& bi, BITestData &td) { 1399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 1401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t tag; 1402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("testIsBoundary():"); 1404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi.setText(td.fDataToBreak); 1405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.clearResults(); 1406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i <= td.fDataToBreak.length(); i++) { 1408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (bi.isBoundary(i)) { 1409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualBreakPositions.addElement(i, status); // Save result. 1410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tag = bi.getRuleStatus(); 1411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.fActualTags.addElement(tag, status); 1412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) td.checkResults("testIsBoundary: ", this); 1415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::doMultipleSelectionTest(RuleBasedBreakIterator& iterator, BITestData &td) 1420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) iterator.setText(td.fDataToBreak); 1422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator* testIterator =(RuleBasedBreakIterator*)iterator.clone(); 1424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t offset = iterator.first(); 1425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t testOffset; 1426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t count = 0; 1427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("doMultipleSelectionTest text of length: %d", td.fDataToBreak.length()); 1429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (*testIterator != iterator) 1431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("clone() or operator!= failed: two clones compared unequal"); 1432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 1434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testOffset = testIterator->first(); 1435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testOffset = testIterator->next(count); 1436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset != testOffset) 1437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset); 1438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset != RuleBasedBreakIterator::DONE) { 1440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count++; 1441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offset = iterator.next(); 1442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset != RuleBasedBreakIterator::DONE && *testIterator == iterator) { 1444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("operator== failed: Two unequal iterators compared equal. count=%d offset=%d", count, offset); 1445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (count > 10000 || offset == -1) { 1446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("operator== failed too many times. Stopping test."); 1447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset == -1) { 1448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Does (RuleBasedBreakIterator::DONE == -1)?"); 1449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while (offset != RuleBasedBreakIterator::DONE); 1455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // now do it backwards... 1457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offset = iterator.last(); 1458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count = 0; 1459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 1461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testOffset = testIterator->last(); 1462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testOffset = testIterator->next(count); // next() with a negative arg is same as previous 1463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset != testOffset) 1464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset); 1465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset != RuleBasedBreakIterator::DONE) { 1467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count--; 1468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offset = iterator.previous(); 1469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while (offset != RuleBasedBreakIterator::DONE); 1471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete testIterator; 1473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------- 1477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// other tests 1479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------- 1481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestEmptyString() 1482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString text = ""; 1484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BITestData x(status); 1487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ADD_DATACHUNK(x, "", 0, status); // Break at start of data 1488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator* bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); 1489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) 1490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 1491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Failed to create the BreakIterator for default locale in TestEmptyString. - %s", u_errorName(status)); 1492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) generalIteratorTest(*bi, x); 1495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 1496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestGetAvailableLocales() 1499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t locCount = 0; 1501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const Locale* locList = BreakIterator::getAvailableLocales(locCount); 1502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (locCount == 0) 1504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataerrln("getAvailableLocales() returned an empty list!"); 1505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Just make sure that it's returning good memory. 1506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i; 1507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i < locCount; ++i) { 1508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln(locList[i].getName()); 1509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//Testing the BreakIterator::getDisplayName() function 1513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestGetDisplayName() 1514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString result; 1516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator::getDisplayName(Locale::getUS(), result); 1518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (Locale::getDefault() == Locale::getUS() && result != "English (United States)") 1519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataerrln("BreakIterator::getDisplayName() failed: expected \"English (United States)\", got \"" 1520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) + result); 1521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator::getDisplayName(Locale::getFrance(), Locale::getUS(), result); 1523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (result != "French (France)") 1524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataerrln("BreakIterator::getDisplayName() failed: expected \"French (France)\", got \"" 1525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) + result); 1526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 1528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test End Behaviour 1529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @bug 4068137 1530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestEndBehaviour() 1532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString testString("boo."); 1535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *wb = BreakIterator::createWordInstance(Locale::getDefault(), status); 1536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) 1537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 1538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Failed to create the BreakIterator for default locale in TestEndBehaviour. - %s", u_errorName(status)); 1539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) wb->setText(testString); 1542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wb->first() != 0) 1544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Didn't get break at beginning of string."); 1545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wb->next() != 3) 1546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Didn't get break before period in \"boo.\""); 1547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (wb->current() != 4 && wb->next() != 4) 1548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Didn't get break at end of string."); 1549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete wb; 1550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 1552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @bug 4153072 1553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestBug4153072() { 1555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *iter = BreakIterator::createWordInstance(Locale::getDefault(), status); 1557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) 1558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 1559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Failed to create the BreakIterator for default locale in TestBug4153072 - %s", u_errorName(status)); 1560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString str("...Hello, World!..."); 1563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t begin = 3; 1564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t end = str.length() - 3; 1565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool onBoundary; 1566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) StringCharacterIterator* textIterator = new StringCharacterIterator(str, begin, end, begin); 1568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) iter->adoptText(textIterator); 1569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int index; 1570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Note: with the switch to UText, there is no way to restrict the 1571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // iteration range to begin at an index other than zero. 1572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // String character iterators created with a non-zero bound are 1573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // treated by RBBI as being empty. 1574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (index = -1; index < begin + 1; ++index) { 1575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) onBoundary = iter->isBoundary(index); 1576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (index == 0? !onBoundary : onBoundary) { 1577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln((UnicodeString)"Didn't handle isBoundary correctly with offset = " + index + 1578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) " and begin index = " + begin); 1579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete iter; 1582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Test for problem reported by Ashok Matoria on 9 July 2007 1587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// One.<kSoftHyphen><kSpace>Two. 1588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Sentence break at start (0) and then on calling next() it breaks at 1590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 'T' of "Two". Now, at this point if I do next() and 1591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// then previous(), it breaks at <kSOftHyphen> instead of 'T' of "Two". 1592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestBug5775() { 1594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi = BreakIterator::createSentenceInstance(Locale::getEnglish(), status); 1596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 1597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Check for status first for better handling of no data errors. 1601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT(bi != NULL); 1602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (bi == NULL) { 1603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString s("One.\\u00ad Two.", -1, US_INV); 1607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 01234 56789 1608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s = s.unescape(); 1609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi->setText(s); 1610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int pos = bi->next(); 1611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT(pos == 6); 1612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = bi->next(); 1613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT(pos == 10); 1614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = bi->previous(); 1615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT(pos == 6); 1616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 1617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 1622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test Japanese Line Break 1623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @bug 4095322 1624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestJapaneseLineBreak() 1626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 1628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Test needs updating some more... Dump it for now. 1629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Change for Unicode TR 14: Punctuation characters with categories Pi and Pf do not count 1632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // as opening and closing punctuation for line breaking. 1633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Also, \u30fc and \u30fe are not counted as hyphens. Remove these chars 1634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // from these tests. 6-13-2002 1635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString testString = CharsToUnicodeString("\\u4e00x\\u4e8c"); 1638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString precedingChars = CharsToUnicodeString( 1639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //"([{\\u00ab$\\u00a5\\u00a3\\u00a4\\u2018\\u201a\\u201c\\u201e\\u201b\\u201f"); 1640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "([{$\\u00a5\\u00a3\\u00a4\\u201a\\u201e"); 1641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString followingChars = CharsToUnicodeString( 1642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // ")]}\\u00bb!%,.\\u3001\\u3002\\u3063\\u3083\\u3085\\u3087\\u30c3\\u30e3\\u30e5\\u30e7\\u30fc" 1643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ")]}!%,.\\u3001\\u3002\\u3063\\u3083\\u3085\\u3087\\u30c3\\u30e3\\u30e5\\u30e7" 1644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // ":;\\u309b\\u309c\\u3005\\u309d\\u309e\\u30fd\\u30fe\\u2019\\u201d\\u00b0\\u2032\\u2033\\u2034" 1645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ":;\\u309b\\u309c\\u3005\\u309d\\u309e\\u30fd\\u00b0\\u2032\\u2033\\u2034" 1646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2030\\u2031\\u2103\\u2109\\u00a2\\u0300\\u0301\\u0302"); 1647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *iter = BreakIterator::createLineInstance(Locale::getJapan(), status); 1648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i; 1650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) 1651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 1652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Failed to create the BreakIterator for Japanese locale in TestJapaneseLineBreak.\n"); 1653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i < precedingChars.length(); i++) { 1657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testString.setCharAt(1, precedingChars[i]); 1658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) iter->setText(testString); 1659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t j = iter->first(); 1660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (j != 0) 1661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("ja line break failure: failed to start at 0"); 1662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j = iter->next(); 1663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (j != 1) 1664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("ja line break failure: failed to stop before '" + UCharToUnicodeString(precedingChars[i]) 1665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) + "' (" + ((int)(precedingChars[i])) + ")"); 1666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j = iter->next(); 1667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (j != 3) 1668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("ja line break failure: failed to skip position after '" + UCharToUnicodeString(precedingChars[i]) 1669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) + "' (" + ((int)(precedingChars[i])) + ")"); 1670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i < followingChars.length(); i++) { 1673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testString.setCharAt(1, followingChars[i]); 1674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) iter->setText(testString); 1675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int j = iter->first(); 1676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (j != 0) 1677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("ja line break failure: failed to start at 0"); 1678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j = iter->next(); 1679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (j != 2) 1680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("ja line break failure: failed to skip position before '" + UCharToUnicodeString(followingChars[i]) 1681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) + "' (" + ((int)(followingChars[i])) + ")"); 1682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j = iter->next(); 1683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (j != 3) 1684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("ja line break failure: failed to stop after '" + UCharToUnicodeString(followingChars[i]) 1685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) + "' (" + ((int)(followingChars[i])) + ")"); 1686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete iter; 1688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------ 1693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// RBBITest::Extended Run RBBI Tests from an external test data file 1695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------ 1697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct TestParams { 1699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi; 1700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString dataToBreak; 1701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector32 *expectedBreaks; 1702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector32 *srcLine; 1703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector32 *srcCol; 1704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 1705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::executeTest(TestParams *t) { 1707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t bp; 1708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t prevBP; 1709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i; 1710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->bi == NULL) { 1712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->bi->setText(t->dataToBreak); 1716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Run the iterator forward 1718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBP = -1; 1720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) { 1721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (prevBP == bp) { 1722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Fail for lack of forward progress. 1723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Forward Iteration, no forward progress. Break Pos=%4d File line,col=%4d,%4d", 1724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 1725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check that there were we didn't miss an expected break between the last one 1729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // and this one. 1730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=prevBP+1; i<bp; i++) { 1731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->expectedBreaks->elementAti(i) != 0) { 1732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expected[] = {0, i}; 1733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printStringBreaks(t->dataToBreak, expected, 2); 1734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Forward Iteration, break expected, but not found. Pos=%4d File line,col= %4d,%4d", 1735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 1736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check that the break we did find was expected 1740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->expectedBreaks->elementAti(bp) == 0) { 1741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expected[] = {0, bp}; 1742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printStringBreaks(t->dataToBreak, expected, 2); 1743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Forward Iteration, break found, but not expected. Pos=%4d File line,col= %4d,%4d", 1744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 1745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The break was expected. 1747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check that the {nnn} tag value is correct. 1748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t expectedTagVal = t->expectedBreaks->elementAti(bp); 1749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expectedTagVal == -1) { 1750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectedTagVal = 0; 1751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t line = t->srcLine->elementAti(bp); 1753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); 1754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (rs != expectedTagVal) { 1755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Incorrect status for forward break. Pos=%4d File line,col= %4d,%4d.\n" 1756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) " Actual, Expected status = %4d, %4d", 1757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal); 1758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBP = bp; 1763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Verify that there were no missed expected breaks after the last one found 1766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=prevBP+1; i<t->expectedBreaks->size(); i++) { 1767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->expectedBreaks->elementAti(i) != 0) { 1768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Forward Iteration, break expected, but not found. Pos=%4d File line,col= %4d,%4d", 1769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 1770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Run the iterator backwards, verify that the same breaks are found. 1775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBP = t->dataToBreak.length()+2; // start with a phony value for the last break pos seen. 1777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous()) { 1778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (prevBP == bp) { 1779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Fail for lack of progress. 1780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Reverse Iteration, no progress. Break Pos=%4d File line,col=%4d,%4d", 1781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 1782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check that there were we didn't miss an expected break between the last one 1786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // and this one. (UVector returns zeros for index out of bounds.) 1787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=prevBP-1; i>bp; i--) { 1788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->expectedBreaks->elementAti(i) != 0) { 1789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Reverse Itertion, break expected, but not found. Pos=%4d File line,col= %4d,%4d", 1790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 1791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check that the break we did find was expected 1795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->expectedBreaks->elementAti(bp) == 0) { 1796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Reverse Itertion, break found, but not expected. Pos=%4d File line,col= %4d,%4d", 1797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 1798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The break was expected. 1800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check that the {nnn} tag value is correct. 1801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t expectedTagVal = t->expectedBreaks->elementAti(bp); 1802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expectedTagVal == -1) { 1803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectedTagVal = 0; 1804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int line = t->srcLine->elementAti(bp); 1806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); 1807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (rs != expectedTagVal) { 1808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Incorrect status for reverse break. Pos=%4d File line,col= %4d,%4d.\n" 1809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) " Actual, Expected status = %4d, %4d", 1810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal); 1811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBP = bp; 1815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Verify that there were no missed breaks prior to the last one found 1818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=prevBP-1; i>=0; i--) { 1819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->expectedBreaks->elementAti(i) != 0) { 1820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Forward Itertion, break expected, but not found. Pos=%4d File line,col= %4d,%4d", 1821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 1822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestExtended() { 1828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS 1829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale locale(""); 1831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString rules; 1833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TestParams tp; 1834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.bi = NULL; 1835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.expectedBreaks = new UVector32(status); 1836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine = new UVector32(status); 1837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol = new UVector32(status); 1838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_]*) *>"), 0, status); 1840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(status)); 1842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Open and read the test data file. 1847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *testDataDirectory = IntlTest::getSourceTestData(status); 1849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char testFileName[1000]; 1850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFileName)) { 1851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Can't open test data. Path too long."); 1852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) strcpy(testFileName, testDataDirectory); 1855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) strcat(testFileName, "rbbitst.txt"); 1856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int len; 1858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status); 1859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; /* something went wrong, error already output */ 1861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Put the test data into a UnicodeString 1868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString testString(FALSE, testFile, len); 1870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enum EParseState{ 1872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) PARSE_COMMENT, 1873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) PARSE_TAG, 1874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) PARSE_DATA, 1875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) PARSE_NUM 1876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) parseState = PARSE_TAG; 1878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) EParseState savedState = PARSE_TAG; 1880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UChar CH_LF = 0x0a; 1882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UChar CH_CR = 0x0d; 1883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UChar CH_HASH = 0x23; 1884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*static const UChar CH_PERIOD = 0x2e;*/ 1885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UChar CH_LT = 0x3c; 1886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UChar CH_GT = 0x3e; 1887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UChar CH_BACKSLASH = 0x5c; 1888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UChar CH_BULLET = 0x2022; 1889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lineNum = 1; 1891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t colStart = 0; 1892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t column = 0; 1893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t charIdx = 0; 1894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t tagValue = 0; // The numeric value of a <nnn> tag. 1896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (charIdx = 0; charIdx < len; ) { 1898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_ZERO_ERROR; 1899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar c = testString.charAt(charIdx); 1900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx++; 1901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == CH_CR && charIdx<len && testString.charAt(charIdx) == CH_LF) { 1902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // treat CRLF as a unit 1903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = CH_LF; 1904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx++; 1905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == CH_LF || c == CH_CR) { 1907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lineNum++; 1908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) colStart = charIdx; 1909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) column = charIdx - colStart + 1; 1911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch (parseState) { 1913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case PARSE_COMMENT: 1914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == 0x0a || c == 0x0d) { 1915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) parseState = savedState; 1916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case PARSE_TAG: 1920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 1921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == CH_HASH) { 1922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) parseState = PARSE_COMMENT; 1923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) savedState = PARSE_TAG; 1924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (u_isUWhiteSpace(c)) { 1927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testString.compare(charIdx-1, 6, "<word>") == 0) { 1930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete tp.bi; 1931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.bi = BreakIterator::createWordInstance(locale, status); 1932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx += 5; 1933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testString.compare(charIdx-1, 6, "<char>") == 0) { 1936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete tp.bi; 1937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.bi = BreakIterator::createCharacterInstance(locale, status); 1938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx += 5; 1939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testString.compare(charIdx-1, 6, "<line>") == 0) { 1942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete tp.bi; 1943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.bi = BreakIterator::createLineInstance(locale, status); 1944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx += 5; 1945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testString.compare(charIdx-1, 6, "<sent>") == 0) { 1948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete tp.bi; 1949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.bi = NULL; 1950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.bi = BreakIterator::createSentenceInstance(locale, status); 1951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx += 5; 1952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testString.compare(charIdx-1, 7, "<title>") == 0) { 1955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete tp.bi; 1956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.bi = BreakIterator::createTitleInstance(locale, status); 1957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx += 6; 1958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // <locale loc_name> 1962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) localeMatcher.reset(testString); 1963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (localeMatcher.lookingAt(charIdx-1, status)) { 1964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString localeName = localeMatcher.group(1, status); 1965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char localeName8[100]; 1966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) localeName.extract(0, localeName.length(), localeName8, sizeof(localeName8), 0); 1967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) locale = Locale::createFromName(localeName8); 1968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx += localeMatcher.group(0, status).length(); 1969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 1970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testString.compare(charIdx-1, 6, "<data>") == 0) { 1973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) parseState = PARSE_DATA; 1974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx += 5; 1975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.dataToBreak = ""; 1976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.expectedBreaks->removeAllElements(); 1977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol ->removeAllElements(); 1978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine->removeAllElements(); 1979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("line %d: Tag expected in test file.", lineNum); 1983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) parseState = PARSE_COMMENT; 1984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) savedState = PARSE_DATA; 1985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto end_test; // Stop the test. 1986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case PARSE_DATA: 1990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == CH_BULLET) { 1991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t breakIdx = tp.dataToBreak.length(); 1992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.expectedBreaks->setSize(breakIdx+1); 1993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.expectedBreaks->setElementAt(-1, breakIdx); 1994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine->setSize(breakIdx+1); 1995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine->setElementAt(lineNum, breakIdx); 1996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol ->setSize(breakIdx+1); 1997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol ->setElementAt(column, breakIdx); 1998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testString.compare(charIdx-1, 7, "</data>") == 0) { 2002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Add final entry to mappings from break location to source file position. 2003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Need one extra because last break position returned is after the 2004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // last char in the data, not at the last char. 2005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine->addElement(lineNum, status); 2006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol ->addElement(column, status); 2007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) parseState = PARSE_TAG; 2009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx += 6; 2010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // RUN THE TEST! 2012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) executeTest(&tp); 2013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) { 2017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Named character, e.g. \N{COMBINING GRAVE ACCENT} 2018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Get the code point from the name and insert it into the test data. 2019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (Damn, no API takes names in Unicode !!! 2020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we've got to take it back to char *) 2021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, charIdx); 2022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t nameLength = nameEndIdx - (charIdx+2); 2023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char charNameBuf[200]; 2024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 theChar = -1; 2025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (nameEndIdx != -1) { 2026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 2027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testString.extract(charIdx+2, nameLength, charNameBuf, sizeof(charNameBuf)); 2028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charNameBuf[sizeof(charNameBuf)-1] = 0; 2029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) theChar = u_charFromName(U_UNICODE_CHAR_NAME, charNameBuf, &status); 2030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 2031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) theChar = -1; 2032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (theChar == -1) { 2035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Error in named character in test file at line %d, col %d", 2036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lineNum, column); 2037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 2038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Named code point was recognized. Insert it 2039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // into the test data. 2040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.dataToBreak.append(theChar); 2041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tp.dataToBreak.length() > tp.srcLine->size()) { 2042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine->addElement(lineNum, status); 2043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol ->addElement(column, status); 2044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (nameEndIdx > charIdx) { 2047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx = nameEndIdx+1; 2048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testString.compare(charIdx-1, 2, "<>") == 0) { 2057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx++; 2058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t breakIdx = tp.dataToBreak.length(); 2059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.expectedBreaks->setSize(breakIdx+1); 2060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.expectedBreaks->setElementAt(-1, breakIdx); 2061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine->setSize(breakIdx+1); 2062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine->setElementAt(lineNum, breakIdx); 2063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol ->setSize(breakIdx+1); 2064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol ->setElementAt(column, breakIdx); 2065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == CH_LT) { 2069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tagValue = 0; 2070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) parseState = PARSE_NUM; 2071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == CH_HASH && column==3) { // TODO: why is column off so far? 2075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) parseState = PARSE_COMMENT; 2076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) savedState = PARSE_DATA; 2077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == CH_BACKSLASH) { 2081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check for \ at end of line, a line continuation. 2082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Advance over (discard) the newline 2083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 cp = testString.char32At(charIdx); 2084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (cp == CH_CR && charIdx<len && testString.charAt(charIdx+1) == CH_LF) { 2085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We have a CR LF 2086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Need an extra increment of the input ptr to move over both of them 2087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx++; 2088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (cp == CH_LF || cp == CH_CR) { 2090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lineNum++; 2091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) colStart = charIdx; 2092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx++; 2093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Let unescape handle the back slash. 2097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cp = testString.unescapeAt(charIdx); 2098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (cp != -1) { 2099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Escape sequence was recognized. Insert the char 2100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // into the test data. 2101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.dataToBreak.append(cp); 2102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tp.dataToBreak.length() > tp.srcLine->size()) { 2103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine->addElement(lineNum, status); 2104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol ->addElement(column, status); 2105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Not a recognized backslash escape sequence. 2111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Take the next char as a literal. 2112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: Should this be an error? 2113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = testString.charAt(charIdx); 2114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charIdx = testString.moveIndex32(charIdx, 1); 2115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Normal, non-escaped data char. 2118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.dataToBreak.append(c); 2119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Save the mapping from offset in the data to line/column numbers in 2121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the original input file. Will be used for better error messages only. 2122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If there's an expected break before this char, the slot in the mapping 2123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // vector will already be set for this char; don't overwrite it. 2124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (tp.dataToBreak.length() > tp.srcLine->size()) { 2125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine->addElement(lineNum, status); 2126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol ->addElement(column, status); 2127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case PARSE_NUM: 2132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We are parsing an expected numeric tag value, like <1234>, 2133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // within a chunk of data. 2134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (u_isUWhiteSpace(c)) { 2135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == CH_GT) { 2139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Finished the number. Add the info to the expected break data, 2140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // and switch parse state back to doing plain data. 2141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) parseState = PARSE_DATA; 2142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (tagValue == 0) { 2143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tagValue = -1; 2144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t breakIdx = tp.dataToBreak.length(); 2146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.expectedBreaks->setSize(breakIdx+1); 2147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.expectedBreaks->setElementAt(tagValue, breakIdx); 2148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine->setSize(breakIdx+1); 2149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcLine->setElementAt(lineNum, breakIdx); 2150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol ->setSize(breakIdx+1); 2151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tp.srcCol ->setElementAt(column, breakIdx); 2152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (u_isdigit(c)) { 2156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tagValue = tagValue*10 + u_charDigitValue(c); 2157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Syntax Error in test file at line %d, col %d", 2161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lineNum, column); 2162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) parseState = PARSE_COMMENT; 2163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto end_test; // Stop the test 2164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 2169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("ICU Error %s while parsing test file at line %d.", 2170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_errorName(status), lineNum); 2171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_ZERO_ERROR; 2172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto end_test; // Stop the test 2173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)end_test: 2178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete tp.bi; 2179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete tp.expectedBreaks; 2180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete tp.srcLine; 2181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete tp.srcCol; 2182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete [] testFile; 2183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 2184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestThaiBreaks() { 2187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status=U_ZERO_ERROR; 2188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator* b; 2189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale locale = Locale("th"); 2190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t p, index; 2191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar c[]= { 2192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B, 2193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19, 2194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x0E16, 0x0E49, 0x0E33, 0x0000 2195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 2196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t expectedWordResult[] = { 2197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2, 3, 6, 10, 11, 15, 17, 20, 22 2198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 2199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t expectedLineResult[] = { 2200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3, 6, 11, 15, 17, 20, 22 2201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 2202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t size = u_strlen(c); 2204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString text=UnicodeString(c); 2205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) b = BreakIterator::createWordInstance(locale, status); 2207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 2208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Unable to create thai word break iterator. - %s", u_errorName(status)); 2209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 2210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) b->setText(text); 2212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p = index = 0; 2213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while ((p=b->next())!=BreakIterator::DONE && p < size) { 2214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p != expectedWordResult[index++]) { 2215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Incorrect break given by thai word break iterator. Expected: %d Got: %d", expectedWordResult[index-1], p); 2216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete b; 2219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) b = BreakIterator::createLineInstance(locale, status); 2221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 2222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("Unable to create thai line break iterator.\n"); 2223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 2224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) b->setText(text); 2226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p = index = 0; 2227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while ((p=b->next())!=BreakIterator::DONE && p < size) { 2228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p != expectedLineResult[index++]) { 2229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Incorrect break given by thai line break iterator. Expected: %d Got: %d", expectedLineResult[index-1], p); 2230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete b; 2234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// UBreakIteratorType UBRK_WORD, Locale "en_US_POSIX" 2237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Words don't include colon or period (cldrbug #1969). 2238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char posxWordText[] = "Can't have breaks in xx:yy or struct.field for CS-types."; 2239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t posxWordTOffsets[] = { 5, 6, 10, 11, 17, 18, 20, 21, 23, 24, 26, 27, 29, 30, 36, 37, 42, 43, 46, 47, 49, 50, 55, 56 }; 2240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t posxWordROffsets[] = { 5, 6, 10, 11, 17, 18, 20, 21, 26, 27, 29, 30, 42, 43, 46, 47, 49, 50, 55, 56 }; 2241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// UBreakIteratorType UBRK_WORD, Locale "ja" 2243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Don't break in runs of hiragana or runs of ideograph, where the latter includes \u3005 \u3007 \u303B (cldrbug #2009). 2244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char jaWordText[] = "\\u79C1\\u9054\\u306B\\u4E00\\u3007\\u3007\\u3007\\u306E\\u30B3\\u30F3\\u30D4\\u30E5\\u30FC\\u30BF" 2245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u304C\\u3042\\u308B\\u3002\\u5948\\u3005\\u306F\\u30EF\\u30FC\\u30C9\\u3067\\u3042\\u308B\\u3002"; 2246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 2247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t jaWordTOffsets[] = { 2, 3, 7, 8, 14, 17, 18, 20, 21, 24, 27, 28 }; 2248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t jaWordROffsets[] = { 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28 }; 2249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 2250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// There's no separate Japanese word break iterator. Root is the same as Japanese. 2251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Our dictionary-based iterator has to be tweaked to better handle U+3005, 2252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// U+3007, U+300B and some other cases. 2253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t jaWordTOffsets[] = { 1, 2, 3, 4, 5, 7, 8, 12, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 25, 27, 28 }; 2254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t jaWordROffsets[] = { 1, 2, 3, 4, 5, 7, 8, 12, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 25, 27, 28 }; 2255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// UBreakIteratorType UBRK_SENTENCE, Locale "el" 2257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Add break after Greek question mark (cldrbug #2069). 2258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char elSentText[] = "\\u0391\\u03B2, \\u03B3\\u03B4; \\u0395 \\u03B6\\u03B7\\u037E \\u0398 \\u03B9\\u03BA. " 2259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u039B\\u03BC \\u03BD\\u03BE! \\u039F\\u03C0, \\u03A1\\u03C2? \\u03A3"; 2260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t elSentTOffsets[] = { 8, 14, 20, 27, 35, 36 }; 2261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t elSentROffsets[] = { 20, 27, 35, 36 }; 2262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// UBreakIteratorType UBRK_CHARACTER, Locale "th" 2264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Clusters should not include spacing Thai/Lao vowels (prefix or postfix), except for [SARA] AM (cldrbug #2161). 2265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char thCharText[] = "\\u0E01\\u0E23\\u0E30\\u0E17\\u0E48\\u0E2D\\u0E21\\u0E23\\u0E08\\u0E19\\u0E32 " 2266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "(\\u0E2A\\u0E38\\u0E0A\\u0E32\\u0E15\\u0E34-\\u0E08\\u0E38\\u0E11\\u0E32\\u0E21\\u0E32\\u0E28) " 2267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0E40\\u0E14\\u0E47\\u0E01\\u0E21\\u0E35\\u0E1B\\u0E31\\u0E0D\\u0E2B\\u0E32 "; 2268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t thCharTOffsets[] = { 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 2269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 12, 13, 15, 16, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28, 2270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 29, 30, 32, 33, 35, 37, 38, 39, 40, 41 }; 2271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const int32_t thCharROffsets[] = { 1, 3, 5, 6, 7, 8, 9, 11, 2272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 12, 13, 15, 17, 19, 20, 22, 24, 26, 27, 28, 2273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 29, 32, 33, 35, 37, 38, 40, 41 }; 2274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct { 2276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBreakIteratorType type; 2277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char * locale; 2278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char * escapedText; 2279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const int32_t * tailoredOffsets; 2280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t tailoredOffsetsCount; 2281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const int32_t * rootOffsets; 2282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t rootOffsetsCount; 2283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} TailoredBreakItem; 2284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ARRAY_PTR_LEN(array) (array),(sizeof(array)/sizeof(array[0])) 2286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const TailoredBreakItem tbItems[] = { 2288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { UBRK_WORD, "en_US_POSIX", posxWordText, ARRAY_PTR_LEN(posxWordTOffsets), ARRAY_PTR_LEN(posxWordROffsets) }, 2289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { UBRK_WORD, "ja", jaWordText, ARRAY_PTR_LEN(jaWordTOffsets), ARRAY_PTR_LEN(jaWordROffsets) }, 2290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { UBRK_SENTENCE, "el", elSentText, ARRAY_PTR_LEN(elSentTOffsets), ARRAY_PTR_LEN(elSentROffsets) }, 2291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { UBRK_CHARACTER, "th", thCharText, ARRAY_PTR_LEN(thCharTOffsets), ARRAY_PTR_LEN(thCharROffsets) }, 2292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { UBRK_CHARACTER, NULL, NULL, NULL,0, NULL,0 } // terminator 2293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 2294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void formatOffsets(char* buffer, int32_t buflen, int32_t count, const int32_t* offsets) { 2296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (count-- > 0) { 2297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int writeCount; 2298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sprintf(buffer, /* buflen, */ " %d%n", *offsets++, &writeCount); /* wants to be snprintf */ 2299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buffer += writeCount; 2300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buflen -= writeCount; 2301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum { kMaxOffsetCount = 128 }; 2305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TBTest(BreakIterator* brkitr, int type, const char *locale, const char* escapedText, const int32_t *expectOffsets, int32_t expectOffsetsCount) { 2307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) brkitr->setText( CharsToUnicodeString(escapedText) ); 2308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t foundOffsets[kMaxOffsetCount]; 2309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t offset, foundOffsetsCount = 0; 2310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // do forwards iteration test 2311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while ( foundOffsetsCount < kMaxOffsetCount && (offset = brkitr->next()) != BreakIterator::DONE ) { 2312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) foundOffsets[foundOffsetsCount++] = offset; 2313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( foundOffsetsCount != expectOffsetsCount || memcmp(expectOffsets, foundOffsets, foundOffsetsCount*sizeof(foundOffsets[0])) != 0 ) { 2315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // log error for forwards test 2316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char formatExpect[512], formatFound[512]; 2317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) formatOffsets(formatExpect, sizeof(formatExpect), expectOffsetsCount, expectOffsets); 2318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) formatOffsets(formatFound, sizeof(formatFound), foundOffsetsCount, foundOffsets); 2319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("For type %d %-5s, text \"%.16s\"...; expect %d offsets:%s; found %d offsets fwd:%s\n", 2320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) type, locale, escapedText, expectOffsetsCount, formatExpect, foundOffsetsCount, formatFound); 2321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 2322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // do backwards iteration test 2323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --foundOffsetsCount; // back off one from the end offset 2324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while ( foundOffsetsCount > 0 ) { 2325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offset = brkitr->previous(); 2326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( offset != foundOffsets[--foundOffsetsCount] ) { 2327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // log error for backwards test 2328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char formatExpect[512]; 2329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) formatOffsets(formatExpect, sizeof(formatExpect), expectOffsetsCount, expectOffsets); 2330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("For type %d %-5s, text \"%.16s\"...; expect %d offsets:%s; found rev offset %d where expect %d\n", 2331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) type, locale, escapedText, expectOffsetsCount, formatExpect, offset, foundOffsets[foundOffsetsCount]); 2332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestTailoredBreaks() { 2339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const TailoredBreakItem * tbItemPtr; 2340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale rootLocale = Locale("root"); 2341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (tbItemPtr = tbItems; tbItemPtr->escapedText != NULL; ++tbItemPtr) { 2342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale testLocale = Locale(tbItemPtr->locale); 2343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator * tailoredBrkiter = NULL; 2344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator * rootBrkiter = NULL; 2345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 2346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch (tbItemPtr->type) { 2347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case UBRK_CHARACTER: 2348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tailoredBrkiter = BreakIterator::createCharacterInstance(testLocale, status); 2349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rootBrkiter = BreakIterator::createCharacterInstance(rootLocale, status); 2350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case UBRK_WORD: 2352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tailoredBrkiter = BreakIterator::createWordInstance(testLocale, status); 2353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rootBrkiter = BreakIterator::createWordInstance(rootLocale, status); 2354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case UBRK_LINE: 2356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tailoredBrkiter = BreakIterator::createLineInstance(testLocale, status); 2357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rootBrkiter = BreakIterator::createLineInstance(rootLocale, status); 2358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case UBRK_SENTENCE: 2360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tailoredBrkiter = BreakIterator::createSentenceInstance(testLocale, status); 2361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rootBrkiter = BreakIterator::createSentenceInstance(rootLocale, status); 2362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: 2364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_UNSUPPORTED_ERROR; 2365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 2368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "BreakIterator create failed for type %d, locales root or %s - Error: %s", (int)(tbItemPtr->type), tbItemPtr->locale, u_errorName(status)); 2369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 2370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TBTest(tailoredBrkiter, (int)(tbItemPtr->type), tbItemPtr->locale, tbItemPtr->escapedText, tbItemPtr->tailoredOffsets, tbItemPtr->tailoredOffsetsCount); 2372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TBTest(rootBrkiter, (int)(tbItemPtr->type), "root", tbItemPtr->escapedText, tbItemPtr->rootOffsets, tbItemPtr->rootOffsetsCount); 2373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete rootBrkiter; 2375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete tailoredBrkiter; 2376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 2381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TestDictRules create a break iterator from source rules that includes a 2383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// dictionary range. Regression for bug #7130. Source rules 2384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// do not declare a break iterator type (word, line, sentence, etc. 2385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// but the dictionary code, without a type, would loop. 2386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 2388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestDictRules() { 2389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *rules = "$dictionary = [a-z]; \n" 2390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "!!forward; \n" 2391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "$dictionary $dictionary; \n" 2392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "!!reverse; \n" 2393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "$dictionary $dictionary; \n"; 2394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *text = "aa"; 2395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 2396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError parseError; 2397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator bi(rules, parseError, status); 2399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 2400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString utext = text; 2401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi.setText(utext); 2402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t position; 2403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t loops; 2404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (loops = 0; loops<10; loops++) { 2405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) position = bi.next(); 2406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (position == RuleBasedBreakIterator::DONE) { 2407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT(loops == 1); 2411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 2412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataerrln("Error creating RuleBasedBreakIterator: %s", u_errorName(status)); 2413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 2419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// ReadAndConvertFile Read a text data file, convert it to UChars, and 2421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// return the datain one big UChar * buffer, which the caller must delete. 2422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// parameters: 2424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// fileName: the name of the file, with no directory part. The test data directory 2425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// is assumed. 2426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// ulen an out parameter, receives the actual length (in UChars) of the file data. 2427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// encoding The file encoding. If the file contains a BOM, that will override the encoding 2428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// specified here. The BOM, if it exists, will be stripped from the returned data. 2429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Pass NULL for the system default encoding. 2430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// status 2431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// returns: 2432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The file data, converted to UChar. 2433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The caller must delete this when done with 2434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// delete [] theBuffer; 2435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TODO: This is a clone of RegexTest::ReadAndConvertFile. 2437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Move this function to some common place. 2438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------- 2440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar *RBBITest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) { 2441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *retPtr = NULL; 2442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *fileBuf = NULL; 2443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UConverter* conv = NULL; 2444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) FILE *f = NULL; 2445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ulen = 0; 2447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 2448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return retPtr; 2449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Open the file. 2453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) f = fopen(fileName, "rb"); 2455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (f == 0) { 2456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataerrln("Error opening test data file %s\n", fileName); 2457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_FILE_ACCESS_ERROR; 2458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 2459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Read it in 2462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int fileSize; 2464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int amt_read; 2465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fseek( f, 0, SEEK_END); 2467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileSize = ftell(f); 2468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileBuf = new char[fileSize]; 2469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fseek(f, 0, SEEK_SET); 2470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) amt_read = fread(fileBuf, 1, fileSize, f); 2471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (amt_read != fileSize || fileSize <= 0) { 2472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Error reading test data file."); 2473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanUpAndReturn; 2474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Look for a Unicode Signature (BOM) on the data just read 2478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t signatureLength; 2480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char * fileBufC; 2481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char* bomEncoding; 2482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileBufC = fileBuf; 2484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bomEncoding = ucnv_detectUnicodeSignature( 2485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileBuf, fileSize, &signatureLength, &status); 2486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(bomEncoding!=NULL ){ 2487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileBufC += signatureLength; 2488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileSize -= signatureLength; 2489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) encoding = bomEncoding; 2490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Open a converter to take the rule file to UTF-16 2494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) conv = ucnv_open(encoding, &status); 2496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 2497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanUpAndReturn; 2498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Convert the rules to UChar. 2502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Preflight first to determine required buffer size. 2503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ulen = ucnv_toUChars(conv, 2505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, // dest, 2506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0, // destCapacity, 2507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileBufC, 2508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileSize, 2509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &status); 2510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (status == U_BUFFER_OVERFLOW_ERROR) { 2511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Buffer Overflow is expected from the preflight operation. 2512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_ZERO_ERROR; 2513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) retPtr = new UChar[ulen+1]; 2515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_toUChars(conv, 2516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) retPtr, // dest, 2517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ulen+1, 2518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileBufC, 2519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileSize, 2520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &status); 2521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)cleanUpAndReturn: 2524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fclose(f); 2525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete []fileBuf; 2526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_close(conv); 2527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 2528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); 2529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete retPtr; 2530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) retPtr = 0; 2531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ulen = 0; 2532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 2533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return retPtr; 2534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------------- 2539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Run tests from each of the boundary test data files distributed by the Unicode Consortium 2541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------- 2543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestUnicodeFiles() { 2544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator *bi; 2545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 2546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi = (RuleBasedBreakIterator *)BreakIterator::createCharacterInstance(Locale::getEnglish(), status); 2548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 2549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 2550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) runUnicodeTestData("GraphemeBreakTest.txt", bi); 2551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 2553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); 2555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 2556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 2557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) runUnicodeTestData("WordBreakTest.txt", bi); 2558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 2560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi = (RuleBasedBreakIterator *)BreakIterator::createSentenceInstance(Locale::getEnglish(), status); 2562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 2563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 2564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) runUnicodeTestData("SentenceBreakTest.txt", bi); 2565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 2567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); 2569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 2570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 2571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) runUnicodeTestData("LineBreakTest.txt", bi); 2572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 2574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------------- 2578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Run tests from one of the boundary test data files distributed by the Unicode Consortium 2580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------- 2582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *bi) { 2583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS 2584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TODO(andy): Match line break behavior to Unicode 6.0 and remove this time bomb. 2585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVersionInfo icu4601 = { 4, 6, 0, 1 }; 2586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool isICUVersionPast46 = isICUVersionAtLeast(icu4601); 2587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool isLineBreak = 0 == strcmp(fileName, "LineBreakTest.txt"); 2588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 2589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Open and read the test data file, put it into a UnicodeString. 2592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *testDataDirectory = IntlTest::getSourceTestData(status); 2594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char testFileName[1000]; 2595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFileName)) { 2596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataerrln("Can't open test data. Path too long."); 2597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 2598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) strcpy(testFileName, testDataDirectory); 2600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) strcat(testFileName, fileName); 2601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Opening data file %s\n", fileName); 2603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int len; 2605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status); 2606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (status != U_FILE_ACCESS_ERROR) { 2607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 2608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT(testFile != NULL); 2609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status) || testFile == NULL) { 2611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; /* something went wrong, error already output */ 2612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString testFileAsString(TRUE, testFile, len); 2614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Parse the test data file using a regular expression. 2617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Each kind of token is recognized in its own capture group; what type of item was scanned 2618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // is identified by which group had a match. 2619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Caputure Group # 1 2 3 4 5 2621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Parses this item: divide x hex digits comment \n unrecognized \n 2622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString tokenExpr("[ \t]*(?:(\\u00F7)|(\\u00D7)|([0-9a-fA-F]+)|((?:#.*?)?$.)|(.*?$.))", -1, US_INV); 2624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher tokenMatcher(tokenExpr, testFileAsString, UREGEX_MULTILINE | UREGEX_DOTALL, status); 2625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString testString; 2626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector32 breakPositions(status); 2627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int lineNumber = 1; 2628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 2629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 2630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 2631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Scan through each test case, building up the string to be broken in testString, 2635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // and the positions that should be boundaries in the breakPositions vector. 2636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int spin = 0; 2638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tokenMatcher.find()) { 2639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(tokenMatcher.hitEnd()) { 2640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Shouldnt Happen(TM). This means we didn't find the symbols we were looking for. 2641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) This occurred when the text file was corrupt (wasn't marked as UTF-8) 2642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) and caused an infinite loop here on EBCDIC systems! 2643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 2644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr,"FAIL: hit end of file %s for the %8dth time- corrupt data file?\r", fileName, ++spin); 2645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // return; 2646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (tokenMatcher.start(1, status) >= 0) { 2648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Scanned a divide sign, indicating a break position in the test data. 2649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testString.length()>0) { 2650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPositions.addElement(testString.length(), status); 2651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else if (tokenMatcher.start(2, status) >= 0) { 2654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Scanned an 'x', meaning no break at this position in the test data 2655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Nothing to be done here. 2656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else if (tokenMatcher.start(3, status) >= 0) { 2658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Scanned Hex digits. Convert them to binary, append to the character data string. 2659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString &hexNumber = tokenMatcher.group(3, status); 2660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int length = hexNumber.length(); 2661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (length<=8) { 2662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char buf[10]; 2663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) hexNumber.extract (0, length, buf, sizeof(buf), US_INV); 2664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c = (UChar32)strtol(buf, NULL, 16); 2665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c<=0x10ffff) { 2666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testString.append(c); 2667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 2668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Error: Unicode Character value out of range. \'%s\', line %d.\n", 2669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileName, lineNumber); 2670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 2672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Syntax Error: Hex Unicode Character value must have no more than 8 digits at \'%s\', line %d.\n", 2673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileName, lineNumber); 2674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else if (tokenMatcher.start(4, status) >= 0) { 2677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Scanned to end of a line, possibly skipping over a comment in the process. 2678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If the line from the file contained test data, run the test now. 2679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (testString.length() > 0) { 2681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TODO(andy): Remove this time bomb code. 2682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)if (!isLineBreak || isICUVersionPast46 || !(4658 <= lineNumber && lineNumber <= 4758)) { 2683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) checkUnicodeTestCase(fileName, lineNumber, testString, &breakPositions, bi); 2684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Clear out this test case. 2688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The string and breakPositions vector will be refilled as the next 2689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // test case is parsed. 2690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testString.remove(); 2691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPositions.removeAllElements(); 2692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lineNumber++; 2693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 2694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Scanner catchall. Something unrecognized appeared on the line. 2695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char token[16]; 2696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString uToken = tokenMatcher.group(0, status); 2697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uToken.extract(0, uToken.length(), token, (uint32_t)sizeof(token)); 2698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) token[sizeof(token)-1] = 0; 2699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Syntax error in test data file \'%s\', line %d. Scanning \"%s\"\n", fileName, lineNumber, token); 2700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Clean up, in preparation for continuing with the next line. 2702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testString.remove(); 2703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPositions.removeAllElements(); 2704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lineNumber++; 2705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 2707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 2708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete [] testFile; 2713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 2714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------------- 2717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// checkUnicodeTestCase() Run one test case from one of the Unicode Consortium 2719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// test data files. Do only a simple, forward-only check - 2720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// this test is mostly to check that ICU and the Unicode 2721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// data agree with each other. 2722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------------- 2724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::checkUnicodeTestCase(const char *testFileName, int lineNumber, 2725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString &testString, // Text data to be broken 2726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector32 *breakPositions, // Positions where breaks should be found. 2727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator *bi) { 2728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t pos; // Break Position in the test string 2729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t expectedI = 0; // Index of expected break position in the vector of expected results. 2730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t expectedPos; // Expected break position (index into test string) 2731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi->setText(testString); 2733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = bi->first(); 2734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = bi->next(); 2735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (pos != BreakIterator::DONE) { 2737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expectedI >= breakPositions->size()) { 2738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Test file \"%s\", line %d, unexpected break found at position %d", 2739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testFileName, lineNumber, pos); 2740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectedPos = breakPositions->elementAti(expectedI); 2743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pos < expectedPos) { 2744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Test file \"%s\", line %d, unexpected break found at position %d", 2745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testFileName, lineNumber, pos); 2746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pos > expectedPos) { 2749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Test file \"%s\", line %d, failed to find expected break at position %d", 2750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testFileName, lineNumber, expectedPos); 2751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = bi->next(); 2754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectedI++; 2755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pos==BreakIterator::DONE && expectedI<breakPositions->size()) { 2758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Test file \"%s\", line %d, failed to find expected break at position %d", 2759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testFileName, lineNumber, breakPositions->elementAti(expectedI)); 2760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS 2766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------- 2767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// classs RBBIMonkeyKind 2769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Monkey Test for Break Iteration 2771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Abstract interface class. Concrete derived classes independently 2772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// implement the break rules for different iterator types. 2773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The Monkey Test itself uses doesn't know which type of break iterator it is 2775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// testing, but works purely in terms of the interface defined here. 2776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------- 2778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class RBBIMonkeyKind { 2779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 2780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Return a UVector of UnicodeSets, representing the character classes used 2781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // for this type of iterator. 2782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual UVector *charClasses() = 0; 2783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Set the test text on which subsequent calls to next() will operate 2785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual void setText(const UnicodeString &s) = 0; 2786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Find the next break postion, starting from the prev break position, or from zero. 2788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Return -1 after reaching end of string. 2789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual int32_t next(int32_t i) = 0; 2790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~RBBIMonkeyKind(); 2792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode deferredStatus; 2793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)protected: 2796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIMonkeyKind(); 2797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private: 2799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 2800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBIMonkeyKind::RBBIMonkeyKind() { 2802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) deferredStatus = U_ZERO_ERROR; 2803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBIMonkeyKind::~RBBIMonkeyKind() { 2806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------------------- 2810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Random Numbers. Similar to standard lib rand() and srand() 2812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Not using library to 2813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1. Get same results on all platforms. 2814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2. Get access to current seed, to more easily reproduce failures. 2815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------- 2817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t m_seed = 1; 2818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t m_rand() 2820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 2821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) m_seed = m_seed * 1103515245 + 12345; 2822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (uint32_t)(m_seed/65536) % 32768; 2823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------ 2827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// class RBBICharMonkey Character (Grapheme Cluster) specific implementation 2829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// of RBBIMonkeyKind. 2830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------ 2832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class RBBICharMonkey: public RBBIMonkeyKind { 2833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 2834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBICharMonkey(); 2835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~RBBICharMonkey(); 2836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual UVector *charClasses(); 2837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual void setText(const UnicodeString &s); 2838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual int32_t next(int32_t i); 2839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private: 2840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector *fSets; 2841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fCRLFSet; 2843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fControlSet; 2844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fExtendSet; 2845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fPrependSet; 2846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fSpacingSet; 2847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fLSet; 2848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fVSet; 2849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fTSet; 2850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fLVSet; 2851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fLVTSet; 2852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fHangulSet; 2853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fAnySet; 2854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *fText; 2856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 2857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBICharMonkey::RBBICharMonkey() { 2860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 2861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fText = NULL; 2863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCRLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\r\\n]"), status); 2865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fControlSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Control}]"), status); 2866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Extend}]"), status); 2867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPrependSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Prepend}]"), status); 2868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSpacingSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = SpacingMark}]"), status); 2869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = L}]"), status); 2870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fVSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = V}]"), status); 2871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fTSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = T}]"), status); 2872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLVSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LV}]"), status); 2873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLVTSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LVT}]"), status); 2874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fHangulSet = new UnicodeSet(); 2875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fHangulSet->addAll(*fLSet); 2876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fHangulSet->addAll(*fVSet); 2877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fHangulSet->addAll(*fTSet); 2878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fHangulSet->addAll(*fLVSet); 2879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fHangulSet->addAll(*fLVTSet); 2880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAnySet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u0000-\\U0010ffff]"), status); 2881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets = new UVector(status); 2883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fCRLFSet, status); 2884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fControlSet, status); 2885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fExtendSet, status); 2886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fPrependSet, status); 2887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fSpacingSet, status); 2888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fHangulSet, status); 2889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fAnySet, status); 2890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 2891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) deferredStatus = status; 2892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBICharMonkey::setText(const UnicodeString &s) { 2897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fText = &s; 2898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RBBICharMonkey::next(int32_t prevPos) { 2903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int p0, p1, p2, p3; // Indices of the significant code points around the 2904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // break position being tested. The candidate break 2905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // location is before p2. 2906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int breakPos = -1; 2908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c0, c1, c2, c3; // The code points at p0, p1, p2 & p3. 2910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(deferredStatus)) { 2912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 2913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Previous break at end of string. return DONE. 2916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (prevPos >= fText->length()) { 2917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 2918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p0 = p1 = p2 = p3 = prevPos; 2920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c3 = fText->char32At(prevPos); 2921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c0 = c1 = c2 = 0; 2922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Loop runs once per "significant" character position in the input text. 2924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 2925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Move all of the positions forward in the input string. 2926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p0 = p1; c0 = c1; 2927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p1 = p2; c1 = c2; 2928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p2 = p3; c2 = c3; 2929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Advancd p3 by one codepoint 2931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p3 = fText->moveIndex32(p3, 1); 2932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c3 = fText->char32At(p3); 2933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p1 == p2) { 2935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Still warming up the loop. (won't work with zero length strings, but we don't care) 2936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 2937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p2 == fText->length()) { 2939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Reached end of string. Always a break position. 2940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule GB3 CR x LF 2944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No Extend or Format characters may appear between the CR and LF, 2945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // which requires the additional check for p2 immediately following p1. 2946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c1==0x0D && c2==0x0A && p1==(p2-1)) { 2948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 2949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (GB4). ( Control | CR | LF ) <break> 2952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fControlSet->contains(c1) || 2953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c1 == 0x0D || 2954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c1 == 0x0A) { 2955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (GB5) <break> ( Control | CR | LF ) 2959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 2960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fControlSet->contains(c2) || 2961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c2 == 0x0D || 2962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c2 == 0x0A) { 2963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 2964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (GB6) L x ( L | V | LV | LVT ) 2968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fLSet->contains(c1) && 2969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fLSet->contains(c2) || 2970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fVSet->contains(c2) || 2971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLVSet->contains(c2) || 2972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLVTSet->contains(c2))) { 2973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 2974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (GB7) ( LV | V ) x ( V | T ) 2977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fLVSet->contains(c1) || fVSet->contains(c1)) && 2978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fVSet->contains(c2) || fTSet->contains(c2))) { 2979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 2980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (GB8) ( LVT | T) x T 2983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fLVTSet->contains(c1) || fTSet->contains(c1)) && 2984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fTSet->contains(c2)) { 2985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 2986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (GB9) Numeric x ALetter 2989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fExtendSet->contains(c2)) { 2990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 2991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (GB9a) x SpacingMark 2994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSpacingSet->contains(c2)) { 2995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 2996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (GB9b) Prepend x 2999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fPrependSet->contains(c1)) { 3000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (GB10) Any <break> Any 3004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPos = p2; 3008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return breakPos; 3009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UVector *RBBICharMonkey::charClasses() { 3014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fSets; 3015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBICharMonkey::~RBBICharMonkey() { 3019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSets; 3020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCRLFSet; 3021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fControlSet; 3022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fExtendSet; 3023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fPrependSet; 3024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSpacingSet; 3025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fLSet; 3026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fVSet; 3027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fTSet; 3028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fLVSet; 3029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fLVTSet; 3030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fHangulSet; 3031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fAnySet; 3032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------ 3035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 3036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// class RBBIWordMonkey Word Break specific implementation 3037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// of RBBIMonkeyKind. 3038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 3039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------ 3040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class RBBIWordMonkey: public RBBIMonkeyKind { 3041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 3042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIWordMonkey(); 3043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~RBBIWordMonkey(); 3044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual UVector *charClasses(); 3045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual void setText(const UnicodeString &s); 3046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual int32_t next(int32_t i); 3047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private: 3048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector *fSets; 3049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fCRSet; 3051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fLFSet; 3052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fNewlineSet; 3053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fKatakanaSet; 3054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fALetterSet; 3055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO(jungshik): Do we still need this change? 3056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // UnicodeSet *fALetterSet; // matches ALetterPlus in word.txt 3057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fMidNumLetSet; 3058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fMidLetterSet; 3059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fMidNumSet; 3060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fNumericSet; 3061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fFormatSet; 3062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fOtherSet; 3063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fExtendSet; 3064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fExtendNumLetSet; 3065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fDictionaryCjkSet; 3066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher *fMatcher; 3068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *fText; 3070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 3071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBIWordMonkey::RBBIWordMonkey() 3074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 3075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 3076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets = new UVector(status); 3078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCRSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = CR}]"), status); 3080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = LF}]"), status); 3081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNewlineSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Newline}]"), status); 3082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDictionaryCjkSet= new UnicodeSet("[[\\uac00-\\ud7a3][:Han:][:Hiragana:]]", status); 3083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Exclude Hangul syllables from ALetterSet during testing. 3084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Leave CJK dictionary characters out from the monkey tests! 3085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 3086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}" 3087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "[\\p{Line_Break = Complex_Context}" 3088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "-\\p{Grapheme_Cluster_Break = Extend}" 3089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "-\\p{Grapheme_Cluster_Break = Control}" 3090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "]]", 3091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status); 3092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 3093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fALetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ALetter}]"), status); 3094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fALetterSet->removeAll(*fDictionaryCjkSet); 3095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fKatakanaSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Katakana}]"), status); 3096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMidNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNumLet}]"), status); 3097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMidLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidLetter}]"), status); 3098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMidNumSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNum}]"), status); 3099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Numeric}[\\uff10-\\uff19]]"), status); 3100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Format}]"), status); 3101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fExtendNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ExtendNumLet}]"), status); 3102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Extend}]"), status); 3103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet = new UnicodeSet(); 3105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(status)) { 3106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) deferredStatus = status; 3107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 3108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->complement(); 3111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fCRSet); 3112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fLFSet); 3113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fNewlineSet); 3114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fKatakanaSet); 3115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fALetterSet); 3116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fMidLetterSet); 3117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fMidNumSet); 3118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fNumericSet); 3119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fExtendNumLetSet); 3120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fFormatSet); 3121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fExtendSet); 3122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Inhibit dictionary characters from being tested at all. 3123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fDictionaryCjkSet); 3124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{LineBreak = Complex_Context}]"), status)); 3125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fCRSet, status); 3127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fLFSet, status); 3128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fNewlineSet, status); 3129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fALetterSet, status); 3130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //fSets->addElement(fKatakanaSet, status); //TODO: work out how to test katakana 3131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fMidLetterSet, status); 3132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fMidNumLetSet, status); 3133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fMidNumSet, status); 3134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fNumericSet, status); 3135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fFormatSet, status); 3136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fExtendSet, status); 3137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fOtherSet, status); 3138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fExtendNumLetSet, status); 3139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 3141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) deferredStatus = status; 3142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBIWordMonkey::setText(const UnicodeString &s) { 3146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fText = &s; 3147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RBBIWordMonkey::next(int32_t prevPos) { 3151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int p0, p1, p2, p3; // Indices of the significant code points around the 3152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // break position being tested. The candidate break 3153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // location is before p2. 3154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int breakPos = -1; 3156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c0, c1, c2, c3; // The code points at p0, p1, p2 & p3. 3158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(deferredStatus)) { 3160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 3161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Prev break at end of string. return DONE. 3164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (prevPos >= fText->length()) { 3165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 3166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p0 = p1 = p2 = p3 = prevPos; 3168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c3 = fText->char32At(prevPos); 3169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c0 = c1 = c2 = 0; 3170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Loop runs once per "significant" character position in the input text. 3172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 3173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Move all of the positions forward in the input string. 3174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p0 = p1; c0 = c1; 3175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p1 = p2; c1 = c2; 3176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p2 = p3; c2 = c3; 3177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Advancd p3 by X(Extend | Format)* Rule 4 3179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // But do not advance over Extend & Format following a new line. (Unicode 5.1 change) 3180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 3181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p3 = fText->moveIndex32(p3, 1); 3182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c3 = fText->char32At(p3); 3183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCRSet->contains(c2) || fLFSet->contains(c2) || fNewlineSet->contains(c2)) { 3184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 3186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fFormatSet->contains(c3) || fExtendSet->contains(c3)); 3188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p1 == p2) { 3191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Still warming up the loop. (won't work with zero length strings, but we don't care) 3192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p2 == fText->length()) { 3195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Reached end of string. Always a break position. 3196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (3) CR x LF 3200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No Extend or Format characters may appear between the CR and LF, 3201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // which requires the additional check for p2 immediately following p1. 3202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 3203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c1==0x0D && c2==0x0A) { 3204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (3a) Break before and after newlines (including CR and LF) 3208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 3209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCRSet->contains(c1) || fLFSet->contains(c1) || fNewlineSet->contains(c1)) { 3210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 3212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCRSet->contains(c2) || fLFSet->contains(c2) || fNewlineSet->contains(c2)) { 3213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 3215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (5). ALetter x ALetter 3217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fALetterSet->contains(c1) && 3218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fALetterSet->contains(c2)) { 3219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (6) ALetter x (MidLetter | MidNumLet) ALetter 3223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 3224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( fALetterSet->contains(c1) && 3225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fMidLetterSet->contains(c2) || fMidNumLetSet->contains(c2)) && 3226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fALetterSet->contains(c3)) { 3227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (7) ALetter (MidLetter | MidNumLet) x ALetter 3232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fALetterSet->contains(c0) && 3233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fMidLetterSet->contains(c1) || fMidNumLetSet->contains(c1)) && 3234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fALetterSet->contains(c2)) { 3235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (8) Numeric x Numeric 3239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fNumericSet->contains(c1) && 3240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumericSet->contains(c2)) { 3241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (9) ALetter x Numeric 3245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fALetterSet->contains(c1) && 3246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumericSet->contains(c2)) { 3247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (10) Numeric x ALetter 3251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fNumericSet->contains(c1) && 3252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fALetterSet->contains(c2)) { 3253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (11) Numeric (MidNum | MidNumLet) x Numeric 3257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fNumericSet->contains(c0) && 3258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fMidNumSet->contains(c1) || fMidNumLetSet->contains(c1)) && 3259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumericSet->contains(c2)) { 3260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (12) Numeric x (MidNum | MidNumLet) Numeric 3264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fNumericSet->contains(c1) && 3265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fMidNumSet->contains(c2) || fMidNumLetSet->contains(c2)) && 3266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumericSet->contains(c3)) { 3267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (13) Katakana x Katakana 3271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fKatakanaSet->contains(c1) && 3272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fKatakanaSet->contains(c2)) { 3273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule 13a 3277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fALetterSet->contains(c1) || fNumericSet->contains(c1) || 3278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fKatakanaSet->contains(c1) || fExtendNumLetSet->contains(c1)) && 3279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fExtendNumLetSet->contains(c2)) { 3280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule 13b 3284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fExtendNumLetSet->contains(c1) && 3285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fALetterSet->contains(c2) || fNumericSet->contains(c2) || 3286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fKatakanaSet->contains(c2))) { 3287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule 14. Break found here. 3291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPos = p2; 3295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return breakPos; 3296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UVector *RBBIWordMonkey::charClasses() { 3300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fSets; 3301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBIWordMonkey::~RBBIWordMonkey() { 3305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSets; 3306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCRSet; 3307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fLFSet; 3308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fNewlineSet; 3309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fKatakanaSet; 3310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fALetterSet; 3311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fMidNumLetSet; 3312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fMidLetterSet; 3313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fMidNumSet; 3314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fNumericSet; 3315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fFormatSet; 3316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fExtendSet; 3317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fExtendNumLetSet; 3318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fOtherSet; 3319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------ 3325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 3326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// class RBBISentMonkey Sentence Break specific implementation 3327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// of RBBIMonkeyKind. 3328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 3329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------ 3330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class RBBISentMonkey: public RBBIMonkeyKind { 3331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 3332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBISentMonkey(); 3333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~RBBISentMonkey(); 3334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual UVector *charClasses(); 3335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual void setText(const UnicodeString &s); 3336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual int32_t next(int32_t i); 3337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private: 3338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int moveBack(int posFrom); 3339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int moveForward(int posFrom); 3340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 cAt(int pos); 3341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector *fSets; 3343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fSepSet; 3345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fFormatSet; 3346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fSpSet; 3347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fLowerSet; 3348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fUpperSet; 3349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fOLetterSet; 3350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fNumericSet; 3351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fATermSet; 3352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fSContinueSet; 3353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fSTermSet; 3354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fCloseSet; 3355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fOtherSet; 3356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fExtendSet; 3357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *fText; 3359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 3361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBISentMonkey::RBBISentMonkey() 3363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 3364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 3365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets = new UVector(status); 3367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Separator Set Note: Beginning with Unicode 5.1, CR and LF were removed from the separator 3369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // set and made into character classes of their own. For the monkey impl, 3370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // they remain in SEP, since Sep always appears with CR and LF in the rules. 3371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSepSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sep} \\u000a \\u000d]"), status); 3372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Format}]"), status); 3373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSpSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sp}]"), status); 3374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLowerSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Lower}]"), status); 3375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fUpperSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Upper}]"), status); 3376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = OLetter}]"), status); 3377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Numeric}]"), status); 3378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fATermSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = ATerm}]"), status); 3379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSContinueSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = SContinue}]"), status); 3380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSTermSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = STerm}]"), status); 3381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCloseSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Close}]"), status); 3382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Extend}]"), status); 3383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet = new UnicodeSet(); 3384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(status)) { 3386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) deferredStatus = status; 3387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 3388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->complement(); 3391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fSepSet); 3392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fFormatSet); 3393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fSpSet); 3394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fLowerSet); 3395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fUpperSet); 3396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fOLetterSet); 3397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fNumericSet); 3398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fATermSet); 3399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fSContinueSet); 3400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fSTermSet); 3401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fCloseSet); 3402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOtherSet->removeAll(*fExtendSet); 3403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fSepSet, status); 3405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fFormatSet, status); 3406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fSpSet, status); 3407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fLowerSet, status); 3408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fUpperSet, status); 3409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fOLetterSet, status); 3410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fNumericSet, status); 3411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fATermSet, status); 3412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fSContinueSet, status); 3413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fSTermSet, status); 3414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fCloseSet, status); 3415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fOtherSet, status); 3416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fExtendSet, status); 3417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 3419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) deferredStatus = status; 3420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBISentMonkey::setText(const UnicodeString &s) { 3426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fText = &s; 3427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UVector *RBBISentMonkey::charClasses() { 3430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fSets; 3431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// moveBack() Find the "significant" code point preceding the index i. 3435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Skips over ($Extend | $Format)* . 3436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 3437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int RBBISentMonkey::moveBack(int i) { 3438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (i <= 0) { 3439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 3440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 3442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t j = i; 3443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 3444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j = fText->moveIndex32(j, -1); 3445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = fText->char32At(j); 3446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (j>0 &&(fFormatSet->contains(c) || fExtendSet->contains(c))); 3448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return j; 3449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int RBBISentMonkey::moveForward(int i) { 3454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (i>=fText->length()) { 3455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fText->length(); 3456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 3458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t j = i; 3459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 3460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j = fText->moveIndex32(j, 1); 3461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = cAt(j); 3462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fFormatSet->contains(c) || fExtendSet->contains(c)); 3464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return j; 3465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 RBBISentMonkey::cAt(int pos) { 3468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pos<0 || pos>=fText->length()) { 3469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 3470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 3471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fText->char32At(pos); 3472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RBBISentMonkey::next(int32_t prevPos) { 3476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int p0, p1, p2, p3; // Indices of the significant code points around the 3477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // break position being tested. The candidate break 3478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // location is before p2. 3479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int breakPos = -1; 3481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c0, c1, c2, c3; // The code points at p0, p1, p2 & p3. 3483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 3484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(deferredStatus)) { 3486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 3487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Prev break at end of string. return DONE. 3490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (prevPos >= fText->length()) { 3491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 3492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p0 = p1 = p2 = p3 = prevPos; 3494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c3 = fText->char32At(prevPos); 3495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c0 = c1 = c2 = 0; 3496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Loop runs once per "significant" character position in the input text. 3498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 3499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Move all of the positions forward in the input string. 3500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p0 = p1; c0 = c1; 3501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p1 = p2; c1 = c2; 3502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p2 = p3; c2 = c3; 3503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Advancd p3 by X(Extend | Format)* Rule 4 3505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p3 = moveForward(p3); 3506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c3 = cAt(p3); 3507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (3) CR x LF 3509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c1==0x0d && c2==0x0a && p2==(p1+1)) { 3510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (4). Sep <break> 3514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSepSet->contains(c1)) { 3515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p2 = p1+1; // Separators don't combine with Extend or Format. 3516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p2 >= fText->length()) { 3520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Reached end of string. Always a break position. 3521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (p2 == prevPos) { 3525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Still warming up the loop. (won't work with zero length strings, but we don't care) 3526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (6). ATerm x Numeric 3530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fATermSet->contains(c1) && fNumericSet->contains(c2)) { 3531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (7). Upper ATerm x Uppper 3535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fUpperSet->contains(c0) && fATermSet->contains(c1) && fUpperSet->contains(c2)) { 3536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (8) ATerm Close* Sp* x (not (OLettter | Upper | Lower | Sep | STerm | ATerm))* Lower 3540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Note: STerm | ATerm are added to the negated part of the expression by a 3541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // note to the Unicode 5.0 documents. 3542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int p8 = p1; 3543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fSpSet->contains(cAt(p8))) { 3544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p8 = moveBack(p8); 3545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fCloseSet->contains(cAt(p8))) { 3547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p8 = moveBack(p8); 3548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fATermSet->contains(cAt(p8))) { 3550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p8=p2; 3551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 3552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = cAt(p8); 3553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c==-1 || fOLetterSet->contains(c) || fUpperSet->contains(c) || 3554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLowerSet->contains(c) || fSepSet->contains(c) || 3555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fATermSet->contains(c) || fSTermSet->contains(c)) { 3556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p8 = moveForward(p8); 3559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fLowerSet->contains(cAt(p8))) { 3561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule 8a (STerm | ATerm) Close* Sp* x (SContinue | STerm | ATerm); 3566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSContinueSet->contains(c2) || fSTermSet->contains(c2) || fATermSet->contains(c2)) { 3567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p8 = p1; 3568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fSpSet->contains(cAt(p8))) { 3569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p8 = moveBack(p8); 3570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fCloseSet->contains(cAt(p8))) { 3572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p8 = moveBack(p8); 3573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = cAt(p8); 3575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSTermSet->contains(c) || fATermSet->contains(c)) { 3576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (9) (STerm | ATerm) Close* x (Close | Sp | Sep | CR | LF) 3581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int p9 = p1; 3582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fCloseSet->contains(cAt(p9))) { 3583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p9 = moveBack(p9); 3584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = cAt(p9); 3586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fSTermSet->contains(c) || fATermSet->contains(c))) { 3587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCloseSet->contains(c2) || fSpSet->contains(c2) || fSepSet->contains(c2)) { 3588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (10) (Sterm | ATerm) Close* Sp* x (Sp | Sep | CR | LF) 3593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int p10 = p1; 3594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fSpSet->contains(cAt(p10))) { 3595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p10 = moveBack(p10); 3596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fCloseSet->contains(cAt(p10))) { 3598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p10 = moveBack(p10); 3599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSTermSet->contains(cAt(p10)) || fATermSet->contains(cAt(p10))) { 3601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSpSet->contains(c2) || fSepSet->contains(c2)) { 3602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (11) (STerm | ATerm) Close* Sp* (Sep | CR | LF)? <break> 3607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int p11 = p1; 3608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSepSet->contains(cAt(p11))) { 3609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p11 = moveBack(p11); 3610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fSpSet->contains(cAt(p11))) { 3612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p11 = moveBack(p11); 3613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fCloseSet->contains(cAt(p11))) { 3615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p11 = moveBack(p11); 3616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSTermSet->contains(cAt(p11)) || fATermSet->contains(cAt(p11))) { 3618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule (12) Any x Any 3622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPos = p2; 3625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return breakPos; 3626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBISentMonkey::~RBBISentMonkey() { 3629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSets; 3630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSepSet; 3631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fFormatSet; 3632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSpSet; 3633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fLowerSet; 3634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fUpperSet; 3635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fOLetterSet; 3636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fNumericSet; 3637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fATermSet; 3638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSContinueSet; 3639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSTermSet; 3640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCloseSet; 3641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fOtherSet; 3642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fExtendSet; 3643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------- 3648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 3649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// RBBILineMonkey 3650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 3651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------- 3652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class RBBILineMonkey: public RBBIMonkeyKind { 3654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 3655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBILineMonkey(); 3656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual ~RBBILineMonkey(); 3657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual UVector *charClasses(); 3658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual void setText(const UnicodeString &s); 3659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual int32_t next(int32_t i); 3660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) virtual void rule9Adjust(int32_t pos, UChar32 *posChar, int32_t *nextPos, UChar32 *nextChar); 3661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private: 3662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector *fSets; 3663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fBK; 3665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fCR; 3666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fLF; 3667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fCM; 3668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fNL; 3669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fSG; 3670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fWJ; 3671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fZW; 3672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fGL; 3673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fCB; 3674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fSP; 3675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fB2; 3676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fBA; 3677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fBB; 3678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fHY; 3679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fH2; 3680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fH3; 3681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fCL; 3682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fCP; 3683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fEX; 3684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fIN; 3685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fJL; 3686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fJV; 3687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fJT; 3688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fNS; 3689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fOP; 3690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fQU; 3691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fIS; 3692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fNU; 3693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fPO; 3694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fPR; 3695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fSY; 3696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fAI; 3697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fAL; 3698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fID; 3699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fSA; 3700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *fXX; 3701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *fCharBI; 3703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString *fText; 3705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *fOrigPositions; 3706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher *fNumberMatcher; 3708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher *fLB11Matcher; 3709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 3710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBILineMonkey::RBBILineMonkey() 3713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 3714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 3715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets = new UVector(status); 3717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBK = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_Break=BK}]"), status); 3719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCR = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CR}]"), status); 3720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLF = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=LF}]"), status); 3721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCM = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CM}]"), status); 3722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NL}]"), status); 3723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fWJ = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=WJ}]"), status); 3724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fZW = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ZW}]"), status); 3725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fGL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=GL}]"), status); 3726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCB = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CB}]"), status); 3727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SP}]"), status); 3728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fB2 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=B2}]"), status); 3729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBA = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BA}]"), status); 3730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBB = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BB}]"), status); 3731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fHY = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=HY}]"), status); 3732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fH2 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H2}]"), status); 3733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fH3 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H3}]"), status); 3734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CL}]"), status); 3735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CP}]"), status); 3736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fEX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=EX}]"), status); 3737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fIN = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IN}]"), status); 3738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fJL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JL}]"), status); 3739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fJV = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JV}]"), status); 3740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fJT = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JT}]"), status); 3741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNS = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NS}]"), status); 3742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=OP}]"), status); 3743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fQU = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=QU}]"), status); 3744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fIS = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IS}]"), status); 3745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNU = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NU}]"), status); 3746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPO = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PO}]"), status); 3747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPR = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PR}]"), status); 3748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSY = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SY}]"), status); 3749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAI = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AI}]"), status); 3750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AL}]"), status); 3751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fID = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ID}]"), status); 3752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSA = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SA}]"), status); 3753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSG = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\ud800-\\udfff]"), status); 3754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fXX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=XX}]"), status); 3755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 3757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) deferredStatus = status; 3758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCharBI = NULL; 3759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumberMatcher = NULL; 3760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 3761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAL->addAll(*fXX); // Default behavior for XX is identical to AL 3764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAL->addAll(*fAI); // Default behavior for AI is identical to AL 3765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAL->addAll(*fSA); // Default behavior for SA is XX, which defaults to AL 3766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAL->addAll(*fSG); // Default behavior for SG is identical to AL. 3767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fBK, status); 3769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fCR, status); 3770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fLF, status); 3771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fCM, status); 3772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fNL, status); 3773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fWJ, status); 3774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fZW, status); 3775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fGL, status); 3776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fCB, status); 3777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fSP, status); 3778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fB2, status); 3779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fBA, status); 3780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fBB, status); 3781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fHY, status); 3782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fH2, status); 3783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fH3, status); 3784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fCL, status); 3785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fCP, status); 3786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fEX, status); 3787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fIN, status); 3788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fJL, status); 3789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fJT, status); 3790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fJV, status); 3791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fNS, status); 3792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fOP, status); 3793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fQU, status); 3794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fIS, status); 3795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fNU, status); 3796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fPO, status); 3797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fPR, status); 3798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fSY, status); 3799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fAI, status); 3800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fAL, status); 3801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fID, status); 3802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fWJ, status); 3803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fSA, status); 3804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(fSG, status); 3805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *rules = 3807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?" 3808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?" 3809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\p{Line_Break=NU}\\p{Line_Break=CM}*" 3810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*" 3811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "((\\p{Line_Break=CL}|\\p{Line_Break=CP})\\p{Line_Break=CM}*)?" 3812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?"; 3813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumberMatcher = new RegexMatcher( 3815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString(rules, -1, US_INV), 0, status); 3816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCharBI = BreakIterator::createCharacterInstance(Locale::getEnglish(), status); 3818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 3820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) deferredStatus = status; 3821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBILineMonkey::setText(const UnicodeString &s) { 3826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fText = &s; 3827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCharBI->setText(s); 3828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumberMatcher->reset(s); 3829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 3832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// rule9Adjust 3833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Line Break TR rules 9 and 10 implementation. 3834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// This deals with combining marks and other sequences that 3835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// that must be treated as if they were something other than what they actually are. 3836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 3837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// This is factored out into a separate function because it must be applied twice for 3838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// each potential break, once to the chars before the position being checked, then 3839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// again to the text following the possible break. 3840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 3841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBILineMonkey::rule9Adjust(int32_t pos, UChar32 *posChar, int32_t *nextPos, UChar32 *nextChar) { 3842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pos == -1) { 3843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Invalid initial position. Happens during the warmup iteration of the 3844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // main loop in next(). 3845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 3846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t nPos = *nextPos; 3849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 9 Keep combining sequences together. 3851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // advance over any CM class chars. Note that Line Break CM is different 3852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // from the normal Grapheme Extend property. 3853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!(fSP->contains(*posChar) || fBK->contains(*posChar) || *posChar==0x0d || 3854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *posChar==0x0a ||fNL->contains(*posChar) || fZW->contains(*posChar))) { 3855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 3856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *nextChar = fText->char32At(nPos); 3857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!fCM->contains(*nextChar)) { 3858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nPos = fText->moveIndex32(nPos, 1); 3861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 9 Treat X CM* as if it were x. 3866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No explicit action required. 3867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 10 Treat any remaining combining mark as AL 3869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCM->contains(*posChar)) { 3870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *posChar = 0x41; // thisChar = 'A'; 3871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Push the updated nextPos and nextChar back to our caller. 3874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This only makes a difference if posChar got bigger by consuming a 3875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // combining sequence. 3876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *nextPos = nPos; 3877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *nextChar = fText->char32At(nPos); 3878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 3879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RBBILineMonkey::next(int32_t startPos) { 3883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 3884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t pos; // Index of the char following a potential break position 3885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 thisChar; // Character at above position "pos" 3886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t prevPos; // Index of the char preceding a potential break position 3888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 prevChar; // Character at above position. Note that prevChar 3889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // and thisChar may not be adjacent because combining 3890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // characters between them will be ignored. 3891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t nextPos; // Index of the next character following pos. 3893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Usually skips over combining marks. 3894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t nextCPPos; // Index of the code point following "pos." 3895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // May point to a combining mark. 3896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t tPos; // temp value. 3897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 3898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(deferredStatus)) { 3900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 3901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (startPos >= fText->length()) { 3904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 3905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Initial values for loop. Loop will run the first time without finding breaks, 3909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // while the invalid values shift out and the "this" and 3910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // "prev" positions are filled in with good values. 3911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = prevPos = -1; // Invalid value, serves as flag for initial loop iteration. 3912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) thisChar = prevChar = 0; 3913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextPos = nextCPPos = startPos; 3914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Loop runs once per position in the test text, until a break position 3917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // is found. 3918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 3919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevPos = pos; 3920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevChar = thisChar; 3921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = nextPos; 3923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) thisChar = fText->char32At(pos); 3924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextCPPos = fText->moveIndex32(pos, 1); 3926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextPos = nextCPPos; 3927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule LB2 - Break at end of text. 3929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pos >= fText->length()) { 3930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Rule LB 9 - adjust for combining sequences. 3934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We do this one out-of-order because the adjustment does not change anything 3935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // that would match rules LB 3 - LB 6, but after the adjustment, LB 3-6 do need to 3936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // be applied. 3937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rule9Adjust(prevPos, &prevChar, &pos, &thisChar); 3938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextCPPos = nextPos = fText->moveIndex32(pos, 1); 3939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = fText->char32At(nextPos); 3940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rule9Adjust(pos, &thisChar, &nextPos, &c); 3941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If the loop is still warming up - if we haven't shifted the initial 3943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // -1 positions out of prevPos yet - loop back to advance the 3944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // position in the input without any further looking for breaks. 3945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (prevPos == -1) { 3946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 4 Always break after hard line breaks, 3950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fBK->contains(prevChar)) { 3951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 5 Break after CR, LF, NL, but not inside CR LF 3955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (prevChar == 0x0d && thisChar == 0x0a) { 3956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (prevChar == 0x0d || 3959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevChar == 0x0a || 3960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevChar == 0x85) { 3961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 6 Don't break before hard line breaks 3965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (thisChar == 0x0d || thisChar == 0x0a || thisChar == 0x85 || 3966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBK->contains(thisChar)) { 3967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 7 Don't break before spaces or zero-width space. 3972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSP->contains(thisChar)) { 3973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fZW->contains(thisChar)) { 3977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 8 Break after zero width space 3981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fZW->contains(prevChar)) { 3982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 3983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 9, 10 Already done, at top of loop. 3986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 3987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 11 Do not break before or after WORD JOINER and related characters. 3990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // x WJ 3991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // WJ x 3992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 3993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fWJ->contains(thisChar) || fWJ->contains(prevChar)) { 3994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 3995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 3996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 3997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 12 3998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // GL x 3999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fGL->contains(prevChar)) { 4000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 12a 4004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // [^SP BA HY] x GL 4005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!(fSP->contains(prevChar) || 4006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBA->contains(prevChar) || 4007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fHY->contains(prevChar) ) && fGL->contains(thisChar)) { 4008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 13 Don't break before closings. 4014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // NU x CL, NU x CP and NU x IS are not matched here so that they will 4015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // fall into LB 17 and the more general number regular expression. 4016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 4017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((!fNU->contains(prevChar) && fCL->contains(thisChar)) || 4018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (!fNU->contains(prevChar) && fCP->contains(thisChar)) || 4019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fEX->contains(thisChar) || 4020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (!fNU->contains(prevChar) && fIS->contains(thisChar)) || 4021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (!fNU->contains(prevChar) && fSY->contains(thisChar))) { 4022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 14 Don't break after OP SP* 4026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Scan backwards, checking for this sequence. 4027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The OP char could include combining marks, so we actually check for 4028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // OP CM* SP* 4029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Another Twist: The Rule 67 fixes may have changed a SP CM 4030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // sequence into a ID char, so before scanning back through spaces, 4031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // verify that prevChar is indeed a space. The prevChar variable 4032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // may differ from fText[prevPos] 4033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tPos = prevPos; 4034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSP->contains(prevChar)) { 4035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tPos > 0 && fSP->contains(fText->char32At(tPos))) { 4036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tPos=fText->moveIndex32(tPos, -1); 4037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tPos > 0 && fCM->contains(fText->char32At(tPos))) { 4040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tPos=fText->moveIndex32(tPos, -1); 4041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fOP->contains(fText->char32At(tPos))) { 4043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 15 QU SP* x OP 4048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fOP->contains(thisChar)) { 4049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Scan backwards from prevChar to see if it is preceded by QU CM* SP* 4050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int tPos = prevPos; 4051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tPos>0 && fSP->contains(fText->char32At(tPos))) { 4052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tPos = fText->moveIndex32(tPos, -1); 4053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tPos>0 && fCM->contains(fText->char32At(tPos))) { 4055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tPos = fText->moveIndex32(tPos, -1); 4056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fQU->contains(fText->char32At(tPos))) { 4058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 16 (CL | CP) SP* x NS 4065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Scan backwards for SP* CM* (CL | CP) 4066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fNS->contains(thisChar)) { 4067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int tPos = prevPos; 4068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tPos>0 && fSP->contains(fText->char32At(tPos))) { 4069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tPos = fText->moveIndex32(tPos, -1); 4070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tPos>0 && fCM->contains(fText->char32At(tPos))) { 4072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tPos = fText->moveIndex32(tPos, -1); 4073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCL->contains(fText->char32At(tPos)) || fCP->contains(fText->char32At(tPos))) { 4075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 17 B2 SP* x B2 4081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fB2->contains(thisChar)) { 4082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Scan backwards, checking for the B2 CM* SP* sequence. 4083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tPos = prevPos; 4084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSP->contains(prevChar)) { 4085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tPos > 0 && fSP->contains(fText->char32At(tPos))) { 4086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tPos=fText->moveIndex32(tPos, -1); 4087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tPos > 0 && fCM->contains(fText->char32At(tPos))) { 4090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tPos=fText->moveIndex32(tPos, -1); 4091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fB2->contains(fText->char32At(tPos))) { 4093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 18 break after space 4099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSP->contains(prevChar)) { 4100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 4101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 19 4104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // x QU 4105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // QU x 4106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fQU->contains(thisChar) || fQU->contains(prevChar)) { 4107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 20 Break around a CB 4111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCB->contains(thisChar) || fCB->contains(prevChar)) { 4112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 4113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 21 4116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fBA->contains(thisChar) || 4117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fHY->contains(thisChar) || 4118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNS->contains(thisChar) || 4119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBB->contains(prevChar) ) { 4120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 22 4124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fAL->contains(prevChar) && fIN->contains(thisChar)) || 4125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fID->contains(prevChar) && fIN->contains(thisChar)) || 4126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fIN->contains(prevChar) && fIN->contains(thisChar)) || 4127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fNU->contains(prevChar) && fIN->contains(thisChar)) ) { 4128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 23 ID x PO 4133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // AL x NU 4134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // NU x AL 4135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fID->contains(prevChar) && fPO->contains(thisChar)) || 4136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fAL->contains(prevChar) && fNU->contains(thisChar)) || 4137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fNU->contains(prevChar) && fAL->contains(thisChar)) ) { 4138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 24 Do not break between prefix and letters or ideographs. 4142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // PR x ID 4143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // PR x AL 4144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // PO x AL 4145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fPR->contains(prevChar) && fID->contains(thisChar)) || 4146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fPR->contains(prevChar) && fAL->contains(thisChar)) || 4147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fPO->contains(prevChar) && fAL->contains(thisChar)) ) { 4148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 25 Numbers 4154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fNumberMatcher->lookingAt(prevPos, status)) { 4155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 4156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 4157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Matched a number. But could have been just a single digit, which would 4159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // not represent a "no break here" between prevChar and thisChar 4160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t numEndIdx = fNumberMatcher->end(status); // idx of first char following num 4161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (numEndIdx > pos) { 4162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Number match includes at least our two chars being checked 4163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (numEndIdx > nextPos) { 4164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Number match includes additional chars. Update pos and nextPos 4165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // so that next loop iteration will continue at the end of the number, 4166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // checking for breaks between last char in number & whatever follows. 4167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = nextPos = numEndIdx; 4168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 4169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = fText->moveIndex32(pos, -1); 4170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) thisChar = fText->char32At(pos); 4171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while (fCM->contains(thisChar)); 4172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 26 Do not break a Korean syllable. 4179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fJL->contains(prevChar) && (fJL->contains(thisChar) || 4180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fJV->contains(thisChar) || 4181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fH2->contains(thisChar) || 4182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fH3->contains(thisChar))) { 4183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fJV->contains(prevChar) || fH2->contains(prevChar)) && 4187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fJV->contains(thisChar) || fJT->contains(thisChar))) { 4188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fJT->contains(prevChar) || fH3->contains(prevChar)) && 4192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fJT->contains(thisChar)) { 4193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 27 Treat a Korean Syllable Block the same as ID. 4197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fJL->contains(prevChar) || fJV->contains(prevChar) || 4198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fJT->contains(prevChar) || fH2->contains(prevChar) || fH3->contains(prevChar)) && 4199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fIN->contains(thisChar)) { 4200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fJL->contains(prevChar) || fJV->contains(prevChar) || 4203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fJT->contains(prevChar) || fH2->contains(prevChar) || fH3->contains(prevChar)) && 4204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPO->contains(thisChar)) { 4205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fPR->contains(prevChar) && (fJL->contains(thisChar) || fJV->contains(thisChar) || 4208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fJT->contains(thisChar) || fH2->contains(thisChar) || fH3->contains(thisChar))) { 4209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 28 Do not break between alphabetics ("at"). 4215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fAL->contains(prevChar) && fAL->contains(thisChar)) { 4216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 29 Do not break between numeric punctuation and alphabetics ("e.g."). 4220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fIS->contains(prevChar) && fAL->contains(thisChar)) { 4221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 30 Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation. 4225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (AL | NU) x OP 4226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // CP x (AL | NU) 4227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((fAL->contains(prevChar) || fNU->contains(prevChar)) && fOP->contains(thisChar)) { 4228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCP->contains(prevChar) && (fAL->contains(thisChar) || fNU->contains(thisChar))) { 4231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // LB 31 Break everywhere else 4235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 4236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return pos; 4240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 4241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UVector *RBBILineMonkey::charClasses() { 4244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fSets; 4245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 4246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RBBILineMonkey::~RBBILineMonkey() { 4249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSets; 4250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fBK; 4252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCR; 4253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fLF; 4254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCM; 4255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fNL; 4256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fWJ; 4257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fZW; 4258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fGL; 4259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCB; 4260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSP; 4261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fB2; 4262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fBA; 4263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fBB; 4264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fHY; 4265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fH2; 4266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fH3; 4267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCL; 4268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCP; 4269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fEX; 4270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fIN; 4271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fJL; 4272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fJV; 4273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fJT; 4274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fNS; 4275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fOP; 4276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fQU; 4277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fIS; 4278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fNU; 4279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fPO; 4280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fPR; 4281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSY; 4282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fAI; 4283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fAL; 4284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fID; 4285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSA; 4286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSG; 4287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fXX; 4288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCharBI; 4290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fNumberMatcher; 4291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 4292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------- 4295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 4296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TestMonkey 4297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 4298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// params 4299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// seed=nnnnn Random number starting seed. 4300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Setting the seed allows errors to be reproduced. 4301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// loop=nnn Looping count. Controls running time. 4302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// -1: run forever. 4303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 0 or greater: run length. 4304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 4305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// type = char | word | line | sent | title 4306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 4307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------------------- 4308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t getIntParam(UnicodeString name, UnicodeString ¶ms, int32_t defaultVal) { 4310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t val = defaultVal; 4311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) name.append(" *= *(-?\\d+)"); 4312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 4313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher m(name, params, 0, status); 4314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (m.find()) { 4315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The param exists. Convert the string to an int. 4316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char valString[100]; 4317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t paramLength = m.end(1, status) - m.start(1, status); 4318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (paramLength >= (int32_t)(sizeof(valString)-1)) { 4319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) paramLength = (int32_t)(sizeof(valString)-2); 4320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) params.extract(m.start(1, status), paramLength, valString, sizeof(valString)); 4322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) val = strtol(valString, NULL, 10); 4323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Delete this parameter from the params string. 4325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) m.reset(); 4326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) params = m.replaceFirst("", status); 4327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(U_SUCCESS(status)); 4329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return val; 4330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 4331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 4332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void testBreakBoundPreceding(RBBITest *test, UnicodeString ustr, 4334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi, 4335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expected[], 4336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expectedcount) 4337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 4338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int count = 0; 4339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i = 0; 4340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int forward[50]; 4341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi->setText(ustr); 4342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) { 4343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) forward[count] = i; 4344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (count < expectedcount && expected[count] != i) { 4345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) test->errln("break forward test failed: expected %d but got %d", 4346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expected[count], i); 4347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 4348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count ++; 4350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (count != expectedcount) { 4352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printStringBreaks(ustr, expected, expectedcount); 4353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) test->errln("break forward test failed: missed %d match", 4354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectedcount - count); 4355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // testing boundaries 4358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 1; i < expectedcount; i ++) { 4359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int j = expected[i - 1]; 4360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!bi->isBoundary(j)) { 4361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printStringBreaks(ustr, expected, expectedcount); 4362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) test->errln("isBoundary() failed. Expected boundary at position %d", j); 4363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (j = expected[i - 1] + 1; j < expected[i]; j ++) { 4366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (bi->isBoundary(j)) { 4367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printStringBreaks(ustr, expected, expectedcount); 4368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) test->errln("isBoundary() failed. Not expecting boundary at position %d", j); 4369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) { 4375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count --; 4376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (forward[count] != i) { 4377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printStringBreaks(ustr, expected, expectedcount); 4378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) test->errln("happy break test previous() failed: expected %d but got %d", 4379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) forward[count], i); 4380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 4381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (count != 0) { 4384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printStringBreaks(ustr, expected, expectedcount); 4385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) test->errln("break test previous() failed: missed a match"); 4386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // testing preceding 4390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i < expectedcount - 1; i ++) { 4391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // int j = expected[i] + 1; 4392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int j = ustr.moveIndex32(expected[i], 1); 4393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (; j <= expected[i + 1]; j ++) { 4394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (bi->preceding(j) != expected[i]) { 4395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printStringBreaks(ustr, expected, expectedcount); 4396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) test->errln("preceding(): Not expecting boundary at position %d", j); 4397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 4402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestWordBreaks(void) 4404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 4405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS 4406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale locale("en"); 4408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 4409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status); 4410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi = BreakIterator::createWordInstance(locale, status); 4411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Replaced any C+J characters in a row with a random sequence of characters 4412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // of the same length to make our C+J segmentation not get in the way. 4413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const char *strlist[] = 4414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 4415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U000e0032\\u0097\\u0f94\\uc2d8\\u05f4\\U000e0031\\u060d", 4416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U000e0037\\u2666\\u1202\\u003a\\U000e0031\\u064d\\u0bea\\u091c\\U000e0040\\u003b", 4417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0589\\u3e99\\U0001d7f3\\U000e0074\\u1810\\u200e\\U000e004b\\u0027\\U000e0061\\u003a", 4418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u398c\\U000104a5\\U0001d173\\u102d\\u002e\\uca3b\\u002e\\u002c\\u5622", 4419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\uac00\\u3588\\u009c\\u0953\\u194b", 4420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e", 4421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0602\\u2019\\ua191\\U000e0063\\u0a4c\\u003a\\ub4b5\\u003a\\u827f\\u002e", 4422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2f1f\\u1634\\u05f8\\u0944\\u04f2\\u0cdf\\u1f9c\\u05f4\\u002e", 4423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044", 4424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u003b\\u024a\\u102e\\U000e0071\\u0600", 4425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2027\\U000e0067\\u0a47\\u00b7", 4426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0", 4427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027", 4428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0589\\U000e006e\\u0a42\\U000104a5", 4429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0f66\\u2523\\u003a\\u0cae\\U000e0047\\u003a", 4430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7", 4431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0027\\u11af\\U000e0057\\u0602", 4432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U0001d7f2\\U000e007\\u0004\\u0589", 4433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b", 4434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U0001d7f2\\U000e007d\\u0004\\u0589", 4435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d", 4436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959", 4437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U000e0065\\u302c\\uc986\\u09ee\\U000e0068", 4438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7", 4439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0233\\U000e0020\\u0a69\\u0d6a", 4440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019", 4441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u18f4\\U000e0049\\u20e7\\u2027", 4442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ub315\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe", 4443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ua183\\u102d\\u0bec\\u003a", 4444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u17e8\\u06e7\\u002e\\u096d\\u003b", 4445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u003a\\u0e57\\u0fad\\u002e", 4446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de", 4447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a", 4448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U000e005d\\u2044\\u0731\\u0650\\u0061", 4449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u003a\\u0664\\u00b7\\u1fba", 4450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u003b\\u0027\\u00b7\\u47a3", 4451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2027\\U000e0067\\u0a42\\u00b7\\u4edf\\uc26c\\u003a\\u4186\\u041b", 4452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0027\\u003a\\U0001d70f\\U0001d7df\\ubf4a\\U0001d7f5\\U0001d177\\u003a\\u0e51\\u1058\\U000e0058\\u00b7\\u0673", 4453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\uc30d\\u002e\\U000e002c\\u0c48\\u003a\\ub5a1\\u0661\\u002c", 4454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 4455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int loop; 4456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 4457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 4458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) { 4461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // printf("looping %d\n", loop); 4462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString ustr = CharsToUnicodeString(strlist[loop]); 4463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // RBBICharMonkey monkey; 4464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIWordMonkey monkey; 4465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expected[50]; 4467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expectedcount = 0; 4468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) monkey.setText(ustr); 4470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 4471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) { 4472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expected[expectedcount ++] = i; 4473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testBreakBoundPreceding(this, ustr, bi, expected, expectedcount); 4476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 4478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 4479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 4480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestWordBoundary(void) 4482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 4483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // <data><>\u1d4a\u206e<?>\u0603\U0001d7ff<>\u2019<></data> 4484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale locale("en"); 4485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 4486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status); 4487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi = BreakIterator::createWordInstance(locale, status); 4488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar str[50]; 4489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const char *strlist[] = 4490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 4491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e", 4492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044", 4493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u003b\\u024a\\u102e\\U000e0071\\u0600", 4494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2027\\U000e0067\\u0a47\\u00b7", 4495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0", 4496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027", 4497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0589\\U000e006e\\u0a42\\U000104a5", 4498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u4f66\\ub523\\u003a\\uacae\\U000e0047\\u003a", 4499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7", 4500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0027\\u11af\\U000e0057\\u0602", 4501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U0001d7f2\\U000e007\\u0004\\u0589", 4502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b", 4503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U0001d7f2\\U000e007d\\u0004\\u0589", 4504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d", 4505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0e01\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959", 4506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U000e0065\\u302c\\u09ee\\U000e0068", 4507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7", 4508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0233\\U000e0020\\u0a69\\u0d6a", 4509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019", 4510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u58f4\\U000e0049\\u20e7\\u2027", 4511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe", 4512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ua183\\u102d\\u0bec\\u003a", 4513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u17e8\\u06e7\\u002e\\u096d\\u003b", 4514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u003a\\u0e57\\u0fad\\u002e", 4515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de", 4516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a", 4517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ua2a5\\u0038\\u2044\\u002e\\u0c67\\U000e003c\\u05f4\\u2027\\u05f4\\u2019", 4518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u003a\\u0664\\u00b7\\u1fba", 4519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u003b\\u0027\\u00b7\\u47a3", 4520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 4521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int loop; 4522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 4523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 4524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) { 4527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // printf("looping %d\n", loop); 4528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_unescape(strlist[loop], str, 20); 4529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString ustr(str); 4530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int forward[50]; 4531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int count = 0; 4532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi->setText(ustr); 4534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int prev = 0; 4535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 4536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) { 4537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) forward[count ++] = i; 4538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (i > prev) { 4539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int j; 4540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (j = prev + 1; j < i; j ++) { 4541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (bi->isBoundary(j)) { 4542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printStringBreaks(ustr, forward, count); 4543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("happy boundary test failed: expected %d not a boundary", 4544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j); 4545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!bi->isBoundary(i)) { 4550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printStringBreaks(ustr, forward, count); 4551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("happy boundary test failed: expected %d a boundary", 4552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i); 4553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prev = i; 4556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 4559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 4560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestLineBreaks(void) 4562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 4563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS 4564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale locale("en"); 4565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 4566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi = BreakIterator::createLineInstance(locale, status); 4567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const int32_t STRSIZE = 50; 4568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar str[STRSIZE]; 4569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const char *strlist[] = 4570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 4571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u300f\\ufdfc\\ub798\\u2011\\u2011\\u0020\\u0b43\\u002d\\ubeec\\ufffc", 4572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u24ba\\u2060\\u3405\\ub290\\u000d\\U000e0032\\ufe35\\u00a0\\u0361\\" 4573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "U000112ed\\u0f0c\\u000a\\u308e\\ua875\\u0085\\u114d", 4574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ufffc\\u3063\\u2e08\\u30e3\\u000d\\u002d\\u0ed8\\u002f\\U00011a57\\" 4575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "u2014\\U000e0105\\u118c\\u000a\\u07f8", 4576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0668\\u192b\\u002f\\u2034\\ufe39\\u00b4\\u0cc8\\u2571\\u200b\\u003f", 4577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ufeff\\ufffc\\u3289\\u0085\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123", 4578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ufe3c\\u201c\\u000d\\u2025\\u2007\\u201c\\u002d\\u20a0\\u002d\\u30a7\\u17a4", 4579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123", 4580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u002d\\uff1b\\u02c8\\u2029\\ufeff\\u0f22\\u2044\\ufe09\\u003a\\u096d\\u2009\\u000a\\u06f7\\u02cc\\u1019\\u2060", 4581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u1781\\u0b68\\u0f0c\\u3010\\u0085\\U00011f7a\\u0020\\u0dd6\\u200b\\U000e007a\\u000a\\u2060\\u2026\\u002f\\u2026\\u24dc\\u101e\\u2014\\u2007\\u30a5", 4582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2770\\u0020\\U000e010f\\u0020\\u2060\\u000a\\u02cc\\u0bcc\\u060d\\u30e7\\u0f3b\\u002f", 4583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ufeff\\u0028\\u003b\\U00012fec\\u2010\\u0020\\u0004\\u200b\\u0020\\u275c\\u002f\\u17b1", 4584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u20a9\\u2014\\u00a2\\u31f1\\u002f\\u0020\\u05b8\\u200b\\u0cc2\\u003b\\u060d\\u02c8\\ua4e8\\u002f\\u17d5", 4585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u002d\\u136f\\uff63\\u0084\\ua933\\u2028\\u002d\\u431b\\u200b\\u20b0", 4586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\uade3\\u11d6\\u000a\\U0001107d\\u203a\\u201d\\ub070\\u000d\\u2024\\ufffc", 4587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\uff5b\\u101c\\u1806\\u002f\\u2213\\uff5f", 4588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2014\\u0a83\\ufdfc\\u003f\\u00a0\\u0020\\u000a\\u2991\\U0001d179\\u0020\\u201d\\U000125f6\\u0a67\\u20a7\\ufeff\\u043f", 4589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u169b\\U000e0130\\u002d\\u1041\\u0f3d\\u0abf\\u00b0\\u31fb\\u00a0\\u002d\\u02c8\\u003b", 4590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2762\\u1680\\u002d\\u2028\\u0027\\u01dc\\ufe56\\u003a\\u000a\\uffe6\\u29fd\\u0020\\u30ee\\u007c\\U0001d178\\u0af1\\u0085", 4591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u3010\\u200b\\u2029\\ufeff\\ufe6a\\u275b\\U000e013b\\ufe37\\u24d4\\u002d\\u1806\\u256a\\u1806\\u247c\\u0085\\u17ac", 4592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u99ab\\u0027\\u003b\\u2026\\ueaf0\\u0020\\u0020\\u0313\\u0020\\u3099\\uff09\\u208e\\u2011\\u2007\\u2060\\u000a\\u0020\\u0020\\u300b\\u0bf9", 4593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u1806\\u060d\\u30f5\\u00b4\\u17e9\\u2544\\u2028\\u2024\\u2011\\u20a3\\u002d\\u09cc\\u1782\\u000d\\uff6f\\u0025", 4594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u002f\\uf22e\\u1944\\ufe3d\\u0020\\u206f\\u31b3\\u2014\\u002d\\u2025\\u0f0c\\u0085\\u2763", 4595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u002f\\u2563\\u202f\\u0085\\u17d5\\u200b\\u0020\\U000e0043\\u2014\\u058a\\u3d0a\\ufe57\\u2035\\u2028\\u2029", 4596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u20ae\\U0001d169\\u9293\\uff1f\\uff1f\\u0021\\u2012\\u2039\\u0085\\u02cc\\u00a2\\u0020\\U000e01ab\\u3085\\u0f3a\\u1806\\u0f0c\\u1945\\u000a\\U0001d7e7", 4597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\uffe6\\u00a0\\u200b\\u0085\\u2116\\u255b\\U0001d7f7\\u178c\\ufffc", 4598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u02cc\\ufe6a\\u00a0\\u0021\\u002d\\u7490\\uec2e\\u200b\\u000a", 4599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\uec2e\\u200b\\u000a\\u0020\\u2028\\u2014\\u8945", 4600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u7490\\uec2e\\u200b\\u000a\\u0020\\u2028\\u2014", 4601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0020\\u2028\\u2014\\u8945\\u002c\\u005b", 4602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u000a\\ufe3c\\u201c\\u000d\\u2025\\u2007\\u201c\\u002d\\u20a0", 4603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2473\\u0e9d\\u0020\\u0085\\u000a\\ufe3c\\u201c\\u000d\\u2025", 4604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U0001d16e\\ufffc\\u2025\\u0021\\u002d", 4605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ufffc\\u301b\\u0fa5\\U000e0103\\u2060\\u208e\\u17d5\\u034f\\u1009\\u003a\\u180e\\u2009\\u3111", 4606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2014\\u0020\\u000a\\u17c5\\u24fc", 4607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ufffc\\u0020\\u2116\\uff6c\\u200b\\u0ac3\\U0001028f", 4608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\uaeb0\\u0344\\u0085\\ufffc\\u073b\\u2010", 4609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ufeff\\u0589\\u0085\\u0eb8\\u30fd\\u002f\\u003a\\u2014\\ufe43", 4610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u09cc\\u256a\\u276d\\u002d\\u3085\\u000d\\u0e05\\u2028\\u0fbb", 4611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2034\\u00bb\\u0ae6\\u300c\\u0020\\u31f8\\ufffc", 4612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2116\\u0ed2\\uff64\\u02cd\\u2001\\u2060", 4613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u809d\\u2e02\\u0f0a\\uc48f\\u2540\\u000d\\u0cef\\u003a\\u0e4d" 4614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U000e0172\\U000e005c\\u17cf\\U00010ca6\\ufeff\\uf621\\u06f3\\uffe5" 4615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0ea2\\ufeff\\udcea\\u3085\\ua874\\u000a\\u0020\\u000b\\u200b", 4616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\ufe10\\u2060\\u1a5a\\u2060\\u17e4\\ufffc\\ubbe1\\ufe15\\u0020\\u00a0", 4617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u2060\\u2213\\u200b\\u2019\\uc2dc\\uff6a\\u1736\\u0085\\udb07", 4618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 4619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int loop; 4620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 4621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 4622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) { 4625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // printf("looping %d\n", loop); 4626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t t = u_unescape(strlist[loop], str, STRSIZE); 4627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t >= STRSIZE) { 4628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT(FALSE); 4629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString ustr(str); 4634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBILineMonkey monkey; 4635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(monkey.deferredStatus)) { 4636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const int EXPECTEDSIZE = 50; 4640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expected[EXPECTEDSIZE]; 4641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expectedcount = 0; 4642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) monkey.setText(ustr); 4644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 4645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) { 4646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expectedcount >= EXPECTEDSIZE) { 4647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT(expectedcount < EXPECTEDSIZE); 4648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expected[expectedcount ++] = i; 4651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testBreakBoundPreceding(this, ustr, bi, expected, expectedcount); 4654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 4656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 4657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 4658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestSentBreaks(void) 4660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 4661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS 4662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale locale("en"); 4663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 4664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi = BreakIterator::createSentenceInstance(locale, status); 4665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar str[200]; 4666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const char *strlist[] = 4667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 4668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "Now\ris\nthe\r\ntime\n\rfor\r\r", 4669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "This\n", 4670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000.", 4671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\"Sentence ending with a quote.\" Bye.", 4672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) " (This is it). Testing the sentence iterator. \"This isn't it.\"", 4673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "Hi! This is a simple sample sentence. (This is it.) This is a simple sample sentence. \"This isn't it.\"", 4674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "Hi! This is a simple sample sentence. It does not have to make any sense as you can see. ", 4675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ", 4676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "Che la dritta via aveo smarrita. He said, that I said, that you said!! ", 4677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "Don't rock the boat.\\u2029Because I am the daddy, that is why. Not on my time (el timo.)!", 4678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U0001040a\\u203a\\u1217\\u2b23\\u000d\\uff3b\\u03dd\\uff57\\u0a69\\u104a\\ufe56\\ufe52" 4679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u3016\\U000e002f\\U000e0077\\u0662\\u1680\\u2984\\U000e006a\\u002e\\ua6ab\\u104a" 4680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u002e\\u019b\\u2005\\u002e\\u0477\\u0438\\u0085\\u0441\\u002e\\u5f61\\u202f" 4681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U0001019f\\uff08\\u27e8\\u055c\\u0352", 4682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u1f3e\\u004d\\u000a\\ua3e4\\U000e0023\\uff63\\u0c52\\u276d\\U0001d5de\\U0001d171" 4683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u0e38\\u17e5\\U00012fe6\\u0fa9\\u267f\\u1da3\\u0046\\u03ed\\udc72\\u0030" 4684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U0001d688\\u0b6d\\u0085\\u0c67\\u1f94\\u0c6c\\u9cb2\\u202a\\u180e\\u000b" 4685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u002e\\U000e005e\\u035b\\u061f\\u02c1\\U000e0025\\u0357\\u0969\\u202b" 4686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\U000130c5\\u0486\\U000e0123\\u2019\\u01bc\\u2006\\u11ad\\u180e\\u2e05" 4687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u10b7\\u013e\\u000a\\u002e\\U00013ea4" 4688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 4689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int loop; 4690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 4691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 4692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) { 4695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_unescape(strlist[loop], str, (int32_t)(sizeof(str) / sizeof(str[0]))); 4696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString ustr(str); 4697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBISentMonkey monkey; 4699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(monkey.deferredStatus)) { 4700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 4701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const int EXPECTEDSIZE = 50; 4704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expected[EXPECTEDSIZE]; 4705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expectedcount = 0; 4706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) monkey.setText(ustr); 4708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 4709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) { 4710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expectedcount >= EXPECTEDSIZE) { 4711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT(expectedcount < EXPECTEDSIZE); 4712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expected[expectedcount ++] = i; 4715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testBreakBoundPreceding(this, ustr, bi, expected, expectedcount); 4718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 4720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 4721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 4722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestMonkey(char *params) { 4724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS 4725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 4727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t loopCount = 500; 4728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t seed = 1; 4729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString breakType = "all"; 4730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale locale("en"); 4731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool useUText = FALSE; 4732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (quick == FALSE) { 4734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) loopCount = 10000; 4735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (params) { 4738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString p(params); 4739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) loopCount = getIntParam("loop", p, loopCount); 4740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) seed = getIntParam("seed", p, seed); 4741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher m(" *type *= *(char|word|line|sent|title) *", p, 0, status); 4743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (m.find()) { 4744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakType = m.group(1, status); 4745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) m.reset(); 4746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p = m.replaceFirst("", status); 4747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher u(" *utext", p, 0, status); 4750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (u.find()) { 4751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) useUText = TRUE; 4752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u.reset(); 4753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p = u.replaceFirst("", status); 4754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // m.reset(p); 4758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (RegexMatcher(UNICODE_STRING_SIMPLE("\\S"), p, 0, status).find()) { 4759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Each option is stripped out of the option string as it is processed. 4760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // All options have been checked. The option string should have been completely emptied.. 4761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char buf[100]; 4762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p.extract(buf, sizeof(buf), NULL, status); 4763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buf[sizeof(buf)-1] = 0; 4764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Unrecognized or extra parameter: %s\n", buf); 4765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (breakType == "char" || breakType == "all") { 4771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBICharMonkey m; 4772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status); 4773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 4774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RunMonkey(bi, m, "char", seed, loopCount, useUText); 4775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (breakType == "all" && useUText==FALSE) { 4776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Also run a quick test with UText when "all" is specified 4777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RunMonkey(bi, m, "char", seed, loopCount, TRUE); 4778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 4781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Creation of character break iterator failed %s", u_errorName(status)); 4782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 4784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (breakType == "word" || breakType == "all") { 4787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Word Break Monkey Test"); 4788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIWordMonkey m; 4789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi = BreakIterator::createWordInstance(locale, status); 4790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 4791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RunMonkey(bi, m, "word", seed, loopCount, useUText); 4792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 4794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Creation of word break iterator failed %s", u_errorName(status)); 4795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 4797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (breakType == "line" || breakType == "all") { 4800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Line Break Monkey Test"); 4801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBILineMonkey m; 4802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi = BreakIterator::createLineInstance(locale, status); 4803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (loopCount >= 10) { 4804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) loopCount = loopCount / 5; // Line break runs slower than the others. 4805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 4807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RunMonkey(bi, m, "line", seed, loopCount, useUText); 4808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 4810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status)); 4811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 4813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (breakType == "sent" || breakType == "all" ) { 4816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Sentence Break Monkey Test"); 4817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBISentMonkey m; 4818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi = BreakIterator::createSentenceInstance(locale, status); 4819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (loopCount >= 10) { 4820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) loopCount = loopCount / 10; // Sentence runs slower than the other break types 4821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 4823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RunMonkey(bi, m, "sentence", seed, loopCount, useUText); 4824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 4826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status)); 4827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 4829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 4832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 4833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 4835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Run a RBBI monkey test. Common routine, for all break iterator types. 4836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Parameters: 4837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// bi - the break iterator to use 4838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// mk - MonkeyKind, abstraction for obtaining expected results 4839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// name - Name of test (char, word, etc.) for use in error messages 4840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// seed - Seed for starting random number generator (parameter from user) 4841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// numIterations 4842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 4843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, uint32_t seed, 4844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t numIterations, UBool useUText) { 4845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS 4847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const int32_t TESTSTRINGLEN = 500; 4849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString testText; 4850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t numCharClasses; 4851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVector *chClasses; 4852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expected[TESTSTRINGLEN*2 + 1]; 4853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int expectedCount = 0; 4854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char expectedBreaks[TESTSTRINGLEN*2 + 1]; 4855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char forwardBreaks[TESTSTRINGLEN*2 + 1]; 4856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char reverseBreaks[TESTSTRINGLEN*2+1]; 4857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char isBoundaryBreaks[TESTSTRINGLEN*2+1]; 4858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char followingBreaks[TESTSTRINGLEN*2+1]; 4859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char precedingBreaks[TESTSTRINGLEN*2+1]; 4860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 4861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int loopCount = 0; 4862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) m_seed = seed; 4864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) numCharClasses = mk.charClasses()->size(); 4866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) chClasses = mk.charClasses(); 4867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check for errors that occured during the construction of the MonkeyKind object. 4869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Can't report them where they occured because errln() is a method coming from intlTest, 4870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // and is not visible outside of RBBITest :-( 4871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(mk.deferredStatus)) { 4872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("status of \"%s\" in creation of RBBIMonkeyKind.", u_errorName(mk.deferredStatus)); 4873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Verify that the character classes all have at least one member. 4877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i<numCharClasses; i++) { 4878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *s = (UnicodeSet *)chClasses->elementAt(i); 4879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (s == NULL || s->size() == 0) { 4880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Character Class #%d is null or of zero size.", i); 4881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 4882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (loopCount < numIterations || numIterations == -1) { 4886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (numIterations == -1 && loopCount % 10 == 0) { 4887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If test is running in an infinite loop, display a periodic tic so 4888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we can tell that it is making progress. 4889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "."); 4890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Save current random number seed, so that we can recreate the random numbers 4892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // for this loop iteration in event of an error. 4893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) seed = m_seed; 4894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Populate a test string with data. 4896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testText.truncate(0); 4897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i<TESTSTRINGLEN; i++) { 4898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t aClassNum = m_rand() % numCharClasses; 4899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *classSet = (UnicodeSet *)chClasses->elementAt(aClassNum); 4900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t charIdx = m_rand() % classSet->size(); 4901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c = classSet->charAt(charIdx); 4902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c < 0) { // TODO: deal with sets containing strings. 4903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("c < 0"); 4904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 4905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) testText.append(c); 4907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Calculate the expected results for this test string. 4910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mk.setText(testText); 4911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) memset(expectedBreaks, 0, sizeof(expectedBreaks)); 4912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectedBreaks[0] = 1; 4913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t breakPos = 0; 4914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectedCount = 0; 4915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 4916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPos = mk.next(breakPos); 4917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (breakPos == -1) { 4918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 4919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (breakPos > testText.length()) { 4921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("breakPos > testText.length()"); 4922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectedBreaks[breakPos] = 1; 4924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(expectedCount<testText.length()); 4925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expected[expectedCount ++] = breakPos; 4926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Find the break positions using forward iteration 4929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) memset(forwardBreaks, 0, sizeof(forwardBreaks)); 4930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (useUText) { 4931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 4932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UText *testUText = utext_openReplaceable(NULL, &testText, &status); 4933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // testUText = utext_openUnicodeString(testUText, &testText, &status); 4934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi->setText(testUText, status); 4935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 4936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_close(testUText); // The break iterator does a shallow clone of the UText 4937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This UText can be closed immediately, so long as the 4938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // testText string continues to exist. 4939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 4940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi->setText(testText); 4941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=bi->first(); i != BreakIterator::DONE; i=bi->next()) { 4944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (i < 0 || i > testText.length()) { 4945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("%s break monkey test: Out of range value returned by breakIterator::next()", name); 4946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 4947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) forwardBreaks[i] = 1; 4949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Find the break positions using reverse iteration 4952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) memset(reverseBreaks, 0, sizeof(reverseBreaks)); 4953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=bi->last(); i != BreakIterator::DONE; i=bi->previous()) { 4954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (i < 0 || i > testText.length()) { 4955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("%s break monkey test: Out of range value returned by breakIterator::next()", name); 4956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 4957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reverseBreaks[i] = 1; 4959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Find the break positions using isBoundary() tests. 4962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) memset(isBoundaryBreaks, 0, sizeof(isBoundaryBreaks)); 4963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT((int32_t)sizeof(isBoundaryBreaks) > testText.length()); 4964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i<=testText.length(); i++) { 4965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) isBoundaryBreaks[i] = bi->isBoundary(i); 4966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Find the break positions using the following() function. 4970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // printf("."); 4971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) memset(followingBreaks, 0, sizeof(followingBreaks)); 4972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lastBreakPos = 0; 4973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) followingBreaks[0] = 1; 4974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i<testText.length(); i++) { 4975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPos = bi->following(i); 4976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (breakPos <= i || 4977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPos < lastBreakPos || 4978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPos > testText.length() || 4979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (breakPos > lastBreakPos && lastBreakPos > i)) { 4980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("%s break monkey test: " 4981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "Out of range value returned by BreakIterator::following().\n" 4982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "Random seed=%d index=%d; following returned %d; lastbreak=%d", 4983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) name, seed, i, breakPos, lastBreakPos); 4984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 4985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) followingBreaks[breakPos] = 1; 4987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lastBreakPos = breakPos; 4988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 4989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Find the break positions using the preceding() function. 4991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) memset(precedingBreaks, 0, sizeof(precedingBreaks)); 4992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lastBreakPos = testText.length(); 4993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) precedingBreaks[testText.length()] = 1; 4994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=testText.length(); i>0; i--) { 4995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPos = bi->preceding(i); 4996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (breakPos >= i || 4997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakPos > lastBreakPos || 4998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (breakPos < 0 && testText.getChar32Start(i)>0) || 4999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (breakPos < lastBreakPos && lastBreakPos < testText.getChar32Start(i)) ) { 5000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("%s break monkey test: " 5001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "Out of range value returned by BreakIterator::preceding().\n" 5002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "index=%d; prev returned %d; lastBreak=%d" , 5003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) name, i, breakPos, lastBreakPos); 5004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (breakPos >= 0 && breakPos < (int32_t)sizeof(precedingBreaks)) { 5005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) precedingBreaks[i] = 2; // Forces an error. 5006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 5008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (breakPos >= 0) { 5009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) precedingBreaks[breakPos] = 1; 5010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lastBreakPos = breakPos; 5012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Compare the expected and actual results. 5016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i<=testText.length(); i++) { 5017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *errorType = NULL; 5018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (forwardBreaks[i] != expectedBreaks[i]) { 5019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorType = "next()"; 5020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (reverseBreaks[i] != forwardBreaks[i]) { 5021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorType = "previous()"; 5022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (isBoundaryBreaks[i] != expectedBreaks[i]) { 5023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorType = "isBoundary()"; 5024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (followingBreaks[i] != expectedBreaks[i]) { 5025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorType = "following()"; 5026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (precedingBreaks[i] != expectedBreaks[i]) { 5027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorType = "preceding()"; 5028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (errorType != NULL) { 5032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Format a range of the test text that includes the failure as 5033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // a data item that can be included in the rbbi test data file. 5034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Start of the range is the last point where expected and actual results 5036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // both agreed that there was a break position. 5037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int startContext = i; 5038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t count = 0; 5039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 5040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (startContext==0) { break; } 5041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) startContext --; 5042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expectedBreaks[startContext] != 0) { 5043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (count == 2) break; 5044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count ++; 5045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // End of range is two expected breaks past the start position. 5049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int endContext = i + 1; 5050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int ci; 5051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (ci=0; ci<2; ci++) { // Number of items to include in error text. 5052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 5053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (endContext >= testText.length()) {break;} 5054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expectedBreaks[endContext-1] != 0) { 5055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (count == 0) break; 5056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count --; 5057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) endContext ++; 5059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Format looks like "<data>\\\uabcd\uabcd\\\U0001abcd...</data>" 5063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString errorText = "<data>"; 5064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /***if (strcmp(errorType, "next()") == 0) { 5065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) startContext = 0; 5066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) endContext = testText.length(); 5067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printStringBreaks(testText, expected, expectedCount); 5069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }***/ 5070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (ci=startContext; ci<endContext;) { 5072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString hexChars("0123456789abcdef"); 5073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 5074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int bn; 5075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = testText.char32At(ci); 5076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ci == i) { 5077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This is the location of the error. 5078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorText.append("<?>"); 5079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (expectedBreaks[ci] != 0) { 5080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This a non-error expected break position. 5081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorText.append("\\"); 5082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c < 0x10000) { 5084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorText.append("\\u"); 5085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (bn=12; bn>=0; bn-=4) { 5086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorText.append(hexChars.charAt((c>>bn)&0xf)); 5087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 5089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorText.append("\\U"); 5090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (bn=28; bn>=0; bn-=4) { 5091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorText.append(hexChars.charAt((c>>bn)&0xf)); 5092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ci = testText.moveIndex32(ci, 1); 5095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorText.append("\\"); 5097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorText.append("</data>\n"); 5098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Output the error 5100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char charErrorTxt[500]; 5101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 5102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorText.extract(charErrorTxt, sizeof(charErrorTxt), NULL, status); 5103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charErrorTxt[sizeof(charErrorTxt)-1] = 0; 5104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("%s break monkey test error. %s. Operation = %s; Random seed = %d; buf Idx = %d\n%s", 5105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) name, (expectedBreaks[i]? "break expected but not found" : "break found but not expected"), 5106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorType, seed, i, charErrorTxt); 5107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 5108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) loopCount++; 5112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 5114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 5115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Bug 5532. UTF-8 based UText fails in dictionary code. 5118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// This test checks the initial patch, 5119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// which is to just keep it from crashing. Correct word boundaries 5120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// await a proper fix to the dictionary code. 5121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 5122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestBug5532(void) { 5123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Text includes a mixture of Thai and Latin. 5124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const unsigned char utf8Data[] = { 5125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0xE0u, 0xB8u, 0x82u, 0xE0u, 0xB8u, 0xB2u, 0xE0u, 0xB8u, 0xA2u, 0xE0u, 5126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0xB9u, 0x80u, 0xE0u, 0xB8u, 0x84u, 0xE0u, 0xB8u, 0xA3u, 0xE0u, 0xB8u, 5127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0xB7u, 0xE0u, 0xB9u, 0x88u, 0xE0u, 0xB8u, 0xADu, 0xE0u, 0xB8u, 0x87u, 5128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0xE0u, 0xB9u, 0x80u, 0xE0u, 0xB8u, 0xA5u, 0xE0u, 0xB9u, 0x88u, 0xE0u, 5129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0xB8u, 0x99u, 0xE0u, 0xB8u, 0x8Bu, 0xE0u, 0xB8u, 0xB5u, 0xE0u, 0xB8u, 5130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x94u, 0xE0u, 0xB8u, 0xB5u, 0x20u, 0x73u, 0x69u, 0x6Du, 0x20u, 0x61u, 5131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x75u, 0x64u, 0x69u, 0x6Fu, 0x2Fu, 0x20u, 0x4Du, 0x4Fu, 0x4Fu, 0x4Eu, 5132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0x20u, 0x65u, 0x63u, 0x6Cu, 0x69u, 0x70u, 0x73u, 0x65u, 0x20u, 0xE0u, 5133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0xB8u, 0xA3u, 0xE0u, 0xB8u, 0xB2u, 0xE0u, 0xB8u, 0x84u, 0xE0u, 0xB8u, 5134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0xB2u, 0x20u, 0x34u, 0x37u, 0x30u, 0x30u, 0x20u, 0xE0u, 0xB8u, 0xA2u, 5135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0xE0u, 0xB8u, 0xB9u, 0xE0u, 0xB9u, 0x82u, 0xE0u, 0xB8u, 0xA3u, 0x00}; 5136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 5138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UText utext=UTEXT_INITIALIZER; 5139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_openUTF8(&utext, (const char *)utf8Data, -1, &status); 5140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 5141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *bi = BreakIterator::createWordInstance(Locale("th"), status); 5143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 5144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 5145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi->setText(&utext, status); 5146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT_SUCCESS(status); 5147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t breakCount = 0; 5149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t previousBreak = -1; 5150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (bi->first(); bi->next() != BreakIterator::DONE; breakCount++) { 5151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // For now, just make sure that the break iterator doesn't hang. 5152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT(previousBreak < bi->current()); 5153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) previousBreak = bi->current(); 5154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TEST_ASSERT(breakCount > 0); 5156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 5157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 5158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_close(&utext); 5159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 5160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 5163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// TestDebug - A place-holder test for debugging purposes. 5164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// For putting in fragments of other tests that can be invoked 5165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// for tracing without a lot of unwanted extra stuff happening. 5166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 5167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RBBITest::TestDebug(void) { 5168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 5169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 5170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int pos = 0; 5171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int ruleStatus = 0; 5172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator* bi = 5174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); 5175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::Locale("th"), status); 5176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (RuleBasedBreakIterator *)BreakIterator::createSentenceInstance(Locale::getDefault(), status); 5177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString s("\\u2008\\u002e\\udc6a\\u37cd\\u71d0\\u2048\\U000e006a\\u002e\\u0046\\ufd3f\\u000a\\u002e"); 5178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // UnicodeString s("Aaa. Bcd"); 5179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s = s.unescape(); 5180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bi->setText(s); 5181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool r = bi->isBoundary(8); 5182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("%s", r?"true":"false"); 5183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 5184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = bi->last(); 5185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 5186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // ruleStatus = bi->getRuleStatus(); 5187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("%d\t%d\n", pos, ruleStatus); 5188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = bi->previous(); 5189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while (pos != BreakIterator::DONE); 5190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 5191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 5192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 5193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 5194