1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/******************************************************************** 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT: 3103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Copyright (c) 1999-2012, International Business Machines Corporation and 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************/ 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/************************************************************************ 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Description 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 12/15/99 Madhu Creation. 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 01/12/2000 Madhu Updated for changed API and added new tests 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru************************************************************************/ 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "utypeinfo.h" // for 'typeid' to work 1327f654740f2a26ad62a5c155af9199af9e69b889claireho 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/brkiter.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/rbbi.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utf16.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/schriter.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 26103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if !UCONFIG_NO_REGULAR_EXPRESSIONS 27103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/regex.h" 28103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utext.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "intltest.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "rbbitst.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h> 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvectr32.h" 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h> 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h> 3954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "unicode/numfmt.h" 4054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "unicode/uscript.h" 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_ASSERT(x) {if (!(x)) { \ 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Failure in file %s, line %d", __FILE__, __LINE__);}} 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 45c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \ 466d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(errcode));}} 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------- 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// runIndexedTest 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------- 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 53103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 54103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// Note: Before adding new tests to this file, check whether the desired test data can 55103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// simply be added to the file testdata/rbbitest.txt. In most cases it can, 56103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// it's much less work than writing a new test, diagnostic output in the event of failures 57103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// is good, and the test data file will is shared with ICU4J, so eventually the test 58103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// will run there as well, without additional effort. 59103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* params ) 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) logln("TestSuite RuleBasedBreakIterator: "); 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (index) { 6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: name = "TestBug4153072"; 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestBug4153072(); break; 6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else 6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case 0: name = "skip"; 7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 73103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 1: name = "skip"; 74103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: name = "TestStatusReturn"; 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestStatusReturn(); break; 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: name = "TestUnicodeFiles"; 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestUnicodeFiles(); break; 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: name = "TestEmptyString"; 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestEmptyString(); break; 8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else 8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case 3: case 4: name = "skip"; 8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 5: name = "TestGetAvailableLocales"; 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestGetAvailableLocales(); break; 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 6: name = "TestGetDisplayName"; 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestGetDisplayName(); break; 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 7: name = "TestEndBehaviour"; 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestEndBehaviour(); break; 97103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 8: case 9: case 10: name = "skip"; 98103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 11: name = "TestWordBreaks"; 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestWordBreaks(); break; 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 12: name = "TestWordBoundary"; 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestWordBoundary(); break; 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 13: name = "TestLineBreaks"; 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestLineBreaks(); break; 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 14: name = "TestSentBreaks"; 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestSentBreaks(); break; 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 15: name = "TestExtended"; 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestExtended(); break; 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else 11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case 7: case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15: name = "skip"; 11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 114103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO 11554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius case 16: 11654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius name = "TestMonkey"; if(exec) TestMonkey(params); break; 117103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#else 11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case 16: 119103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius name = "skip"; break; 120103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif 12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 17: name = "TestBug3818"; 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestBug3818(); break; 12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else 126103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 17: name = "skip"; 12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 130103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 18: name = "skip"; 131103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 19: name = "TestDebug"; 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(exec) TestDebug(); break; 13454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius case 20: name = "skip"; 13554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius break; 13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 21: name = "TestBug5775"; 139b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (exec) TestBug5775(); break; 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else 141103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 21: name = "skip"; 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 144103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 145103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 22: name = "skip"; 146103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho case 23: name = "TestDictRules"; 14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (exec) TestDictRules(); break; 149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho case 24: name = "TestBug5532"; 15027f654740f2a26ad62a5c155af9199af9e69b889claireho if (exec) TestBug5532(); break; 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: name = ""; break; //needed to end loop 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------- 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// class BITestData Holds a set of Break iterator test data and results 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Includes 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// - the string data to be broken 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// - a vector of the expected break positions. 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// - a vector of source line numbers for the data, 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// (to help see where errors occured.) 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// - The expected break tag values. 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// - Vectors of actual break positions and tag values. 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// - Functions for comparing actual with expected and 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// reporting errors. 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------- 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass BITestData { 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString fDataToBreak; 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector fExpectedBreakPositions; 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector fExpectedTags; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector fLineNum; 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector fActualBreakPositions; // Test Results. 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector fActualTags; 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BITestData(UErrorCode &status); 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void addDataChunk(const char *data, int32_t tag, int32_t lineNum, UErrorCode status); 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void checkResults(const char *heading, RBBITest *test); 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void err(const char *heading, RBBITest *test, int32_t expectedIdx, int32_t actualIdx); 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void clearResults(); 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Constructor. 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruBITestData::BITestData(UErrorCode &status) 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru: fExpectedBreakPositions(status), fExpectedTags(status), fLineNum(status), fActualBreakPositions(status), 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fActualTags(status) 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// addDataChunk. Add a section (non-breaking) piece if data to the test data. 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The macro form collects the line number, which is helpful 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// when tracking down failures. 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// A null data item is inserted at the start of each test's data 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// to put the starting zero into the data list. The position saved for 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// each non-null item is its ending position. 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define ADD_DATACHUNK(td, data, tag, status) td.addDataChunk(data, tag, __LINE__, status); 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BITestData::addDataChunk(const char *data, int32_t tag, int32_t lineNum, UErrorCode status) { 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) {return;} 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (data != NULL) { 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDataToBreak.append(CharsToUnicodeString(data)); 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fExpectedBreakPositions.addElement(fDataToBreak.length(), status); 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fExpectedTags.addElement(tag, status); 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLineNum.addElement(lineNum, status); 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// checkResults. Compare the actual and expected break positions, report any differences. 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BITestData::checkResults(const char *heading, RBBITest *test) { 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectedIndex = 0; 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t actualIndex = 0; 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we've run through both the expected and actual results vectors, we're done. 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // break out of the loop. 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedIndex >= fExpectedBreakPositions.size() && 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru actualIndex >= fActualBreakPositions.size()) { 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedIndex >= fExpectedBreakPositions.size()) { 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err(heading, test, expectedIndex-1, actualIndex); 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru actualIndex++; 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (actualIndex >= fActualBreakPositions.size()) { 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err(heading, test, expectedIndex, actualIndex-1); 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedIndex++; 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fActualBreakPositions.elementAti(actualIndex) != fExpectedBreakPositions.elementAti(expectedIndex)) { 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err(heading, test, expectedIndex, actualIndex); 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try to resync the positions of the indices, to avoid a rash of spurious erros. 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fActualBreakPositions.elementAti(actualIndex) < fExpectedBreakPositions.elementAti(expectedIndex)) { 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru actualIndex++; 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedIndex++; 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fActualTags.elementAti(actualIndex) != fExpectedTags.elementAti(expectedIndex)) { 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test->errln("%s, tag mismatch. Test Line = %d, expected tag=%d, got %d", 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru heading, fLineNum.elementAt(expectedIndex), 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fExpectedTags.elementAti(expectedIndex), fActualTags.elementAti(actualIndex)); 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru actualIndex++; 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedIndex++; 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// err - An error was found. Report it, along with information about where the 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// incorrectly broken test data appeared in the source file. 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BITestData::err(const char *heading, RBBITest *test, int32_t expectedIdx, int32_t actualIdx) 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expected = fExpectedBreakPositions.elementAti(expectedIdx); 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t actual = fActualBreakPositions.elementAti(actualIdx); 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t o = 0; 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t line = fLineNum.elementAti(expectedIdx); 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedIdx > 0) { 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The line numbers are off by one because a premature break occurs somewhere 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // within the previous item, rather than at the start of the current (expected) item. 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We want to report the offset of the unexpected break from the start of 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this previous item. 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru o = actual - fExpectedBreakPositions.elementAti(expectedIdx-1); 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (actual < expected) { 284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru test->errln("%s unexpected break at offset %d in test item from line %d. actual break: %d expected break: %d", heading, o, line, actual, expected); 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru test->errln("%s Failed to find break at end of item from line %d. actual break: %d expected break: %d", heading, line, actual, expected); 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BITestData::clearResults() { 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fActualBreakPositions.removeAllElements(); 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fActualTags.removeAllElements(); 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------------- 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// RBBITest constructor and destructor 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------------- 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBITest::RBBITest() { 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBITest::~RBBITest() { 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------------------------- 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test for status {tag} return value from break rules. 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// TODO: a more thorough test. 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------------------------- 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestStatusReturn() { 317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString rulesString1("$Letters = [:L:];\n" 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "$Numbers = [:N:];\n" 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "$Letters+{1};\n" 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "$Numbers+{2};\n" 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Help\\ {4}/me\\!;\n" 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[^$Letters $Numbers];\n" 323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "!.*;\n", -1, US_INV); 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString testString1 = "abc123..abc Help me Help me!"; 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 01234567890123456789012345678 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t bounds1[] = {0, 3, 6, 7, 8, 11, 12, 16, 17, 19, 20, 25, 27, 28, -1}; 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t brkStatus[] = {0, 1, 2, 0, 0, 1, 0, 1, 0, 1, 0, 4, 1, 0, -1}; 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError parseError; 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(status)) { 3346d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("FAIL : in construction - %s", u_errorName(status)); 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t pos; 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0; 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi->setText(testString1); 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (pos=bi->first(); pos!= BreakIterator::DONE; pos=bi->next()) { 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pos != bounds1[i]) { 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: expected break at %d, got %d\n", bounds1[i], pos); 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int tag = bi->getRuleStatus(); 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (tag != brkStatus[i]) { 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: break at %d, expected tag %d, got tag %d\n", pos, brkStatus[i], tag); 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i++; 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void printStringBreaks(UnicodeString ustr, int expected[], 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expectedcount) 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char name[100]; 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("code alpha extend alphanum type word sent line name\n"); 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int j; 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (j = 0; j < ustr.length(); j ++) { 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedcount > 0) { 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int k; 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (k = 0; k < expectedcount; k ++) { 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (j == expected[k]) { 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("------------------------------------------------ %d\n", 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru j); 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = ustr.char32At(j); 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c > 0xffff) { 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru j ++; 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_charName(c, U_UNICODE_CHAR_NAME, name, 100, &status); 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%7x %5d %6d %8d %4s %4s %4s %4s %s\n", (int)c, 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_isUAlphabetic(c), 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND), 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_isalnum(c), 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_getPropertyValueName(UCHAR_GENERAL_CATEGORY, 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_charType(c), 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SHORT_PROPERTY_NAME), 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_getPropertyValueName(UCHAR_WORD_BREAK, 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_getIntPropertyValue(c, 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_WORD_BREAK), 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SHORT_PROPERTY_NAME), 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_getPropertyValueName(UCHAR_SENTENCE_BREAK, 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_getIntPropertyValue(c, 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_SENTENCE_BREAK), 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SHORT_PROPERTY_NAME), 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_getPropertyValueName(UCHAR_LINE_BREAK, 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_getIntPropertyValue(c, 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_LINE_BREAK), 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SHORT_PROPERTY_NAME), 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name); 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestBug3818() { 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Four Thai words... 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar thaiWordData[] = { 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0x0E2B,0x0E0D,0x0E48, 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x0E43,0x0E2B,0x0E0D,0x0E48, 0x0E43,0x0E2B,0x0E0D,0x0E48, 0 }; 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString thaiStr(thaiWordData); 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RuleBasedBreakIterator* bi = 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale("th"), status); 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status) || bi == NULL) { 4146d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Fail at file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status)); 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi->setText(thaiStr); 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t startOfSecondWord = bi->following(1); 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (startOfSecondWord != 4) { 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Fail at file %s, line %d expected start of word at 4, got %d", 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru __FILE__, __LINE__, startOfSecondWord); 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru startOfSecondWord = bi->following(0); 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (startOfSecondWord != 4) { 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Fail at file %s, line %d expected start of word at 4, got %d", 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru __FILE__, __LINE__, startOfSecondWord); 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------- 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// generalIteratorTest Given a break iterator and a set of test data, 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Run the tests and report the results. 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------- 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::generalIteratorTest(RuleBasedBreakIterator& bi, BITestData &td) 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi.setText(td.fDataToBreak); 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testFirstAndNext(bi, td); 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testLastAndPrevious(bi, td); 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testFollowing(bi, td); 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testPreceding(bi, td); 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testIsBoundary(bi, td); 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doMultipleSelectionTest(bi, td); 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// testFirstAndNext. Run the iterator forwards in the obvious first(), next() 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// kind of loop. 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::testFirstAndNext(RuleBasedBreakIterator& bi, BITestData &td) 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t p; 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lastP = -1; 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t tag; 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Test first and next"); 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi.setText(td.fDataToBreak); 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.clearResults(); 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (p=bi.first(); p!=RuleBasedBreakIterator::DONE; p=bi.next()) { 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualBreakPositions.addElement(p, status); // Save result. 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tag = bi.getRuleStatus(); 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualTags.addElement(tag, status); 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p <= lastP) { 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the iterator is not making forward progress, stop. 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No need to raise an error here, it'll be detected in the normal check of results. 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lastP = p; 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.checkResults("testFirstAndNext", this); 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// TestLastAndPrevious. Run the iterator backwards, starting with last(). 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::testLastAndPrevious(RuleBasedBreakIterator& bi, BITestData &td) 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t p; 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lastP = 0x7ffffffe; 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t tag; 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru logln("Test last and previous"); 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi.setText(td.fDataToBreak); 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.clearResults(); 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (p=bi.last(); p!=RuleBasedBreakIterator::DONE; p=bi.previous()) { 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save break position. Insert it at start of vector of results, shoving 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // already-saved results further towards the end. 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualBreakPositions.insertElementAt(p, 0, status); 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // bi.previous(); // TODO: Why does this fix things up???? 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // bi.next(); 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tag = bi.getRuleStatus(); 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualTags.insertElementAt(tag, 0, status); 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p >= lastP) { 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the iterator is not making progress, stop. 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No need to raise an error here, it'll be detected in the normal check of results. 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lastP = p; 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.checkResults("testLastAndPrevious", this); 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::testFollowing(RuleBasedBreakIterator& bi, BITestData &td) 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t p; 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t tag; 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lastP = -2; // A value that will never be returned as a break position. 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // cannot be -1; that is returned for DONE. 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("testFollowing():"); 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi.setText(td.fDataToBreak); 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.clearResults(); 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save the starting point, since we won't get that out of following. 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p = bi.first(); 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualBreakPositions.addElement(p, status); // Save result. 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tag = bi.getRuleStatus(); 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualTags.addElement(tag, status); 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i <= td.fDataToBreak.length()+1; i++) { 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p = bi.following(i); 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p != lastP) { 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p == RuleBasedBreakIterator::DONE) { 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We've reached a new break position. Save it. 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualBreakPositions.addElement(p, status); // Save result. 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tag = bi.getRuleStatus(); 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualTags.addElement(tag, status); 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lastP = p; 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The loop normally exits by means of the break in the middle. 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make sure that the index was at the correct position for the break iterator to have 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // returned DONE. 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i != td.fDataToBreak.length()) { 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("testFollowing(): iterator returned DONE prematurely."); 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Full check of all results. 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.checkResults("testFollowing", this); 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::testPreceding(RuleBasedBreakIterator& bi, BITestData &td) { 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t p; 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t tag; 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lastP = 0x7ffffffe; 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("testPreceding():"); 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi.setText(td.fDataToBreak); 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.clearResults(); 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p = bi.last(); 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualBreakPositions.addElement(p, status); 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tag = bi.getRuleStatus(); 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualTags.addElement(tag, status); 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = td.fDataToBreak.length(); i>=-1; i--) { 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p = bi.preceding(i); 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p != lastP) { 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p == RuleBasedBreakIterator::DONE) { 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We've reached a new break position. Save it. 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualBreakPositions.insertElementAt(p, 0, status); 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lastP = p; 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tag = bi.getRuleStatus(); 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualTags.insertElementAt(tag, 0, status); 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The loop normally exits by means of the break in the middle. 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make sure that the index was at the correct position for the break iterator to have 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // returned DONE. 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i != 0) { 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("testPreceding(): iterator returned DONE prematurely."); 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Full check of all results. 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.checkResults("testPreceding", this); 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::testIsBoundary(RuleBasedBreakIterator& bi, BITestData &td) { 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t tag; 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("testIsBoundary():"); 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi.setText(td.fDataToBreak); 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.clearResults(); 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i <= td.fDataToBreak.length(); i++) { 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bi.isBoundary(i)) { 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualBreakPositions.addElement(i, status); // Save result. 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tag = bi.getRuleStatus(); 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.fActualTags.addElement(tag, status); 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru td.checkResults("testIsBoundary: ", this); 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::doMultipleSelectionTest(RuleBasedBreakIterator& iterator, BITestData &td) 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iterator.setText(td.fDataToBreak); 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RuleBasedBreakIterator* testIterator =(RuleBasedBreakIterator*)iterator.clone(); 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t offset = iterator.first(); 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t testOffset; 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count = 0; 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("doMultipleSelectionTest text of length: %d", td.fDataToBreak.length()); 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*testIterator != iterator) 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("clone() or operator!= failed: two clones compared unequal"); 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testOffset = testIterator->first(); 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testOffset = testIterator->next(count); 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (offset != testOffset) 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset); 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (offset != RuleBasedBreakIterator::DONE) { 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count++; 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offset = iterator.next(); 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (offset != RuleBasedBreakIterator::DONE && *testIterator == iterator) { 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("operator== failed: Two unequal iterators compared equal. count=%d offset=%d", count, offset); 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (count > 10000 || offset == -1) { 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("operator== failed too many times. Stopping test."); 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (offset == -1) { 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Does (RuleBasedBreakIterator::DONE == -1)?"); 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (offset != RuleBasedBreakIterator::DONE); 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // now do it backwards... 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offset = iterator.last(); 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count = 0; 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testOffset = testIterator->last(); 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testOffset = testIterator->next(count); // next() with a negative arg is same as previous 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (offset != testOffset) 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("next(n) and next() not returning consistent results: for step ") + count + ", next(n) returned " + testOffset + " and next() had " + offset); 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (offset != RuleBasedBreakIterator::DONE) { 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count--; 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offset = iterator.previous(); 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (offset != RuleBasedBreakIterator::DONE); 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testIterator; 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------- 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// other tests 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------- 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestEmptyString() 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString text = ""; 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BITestData x(status); 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ADD_DATACHUNK(x, "", 0, status); // Break at start of data 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RuleBasedBreakIterator* bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 6976d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Failed to create the BreakIterator for default locale in TestEmptyString. - %s", u_errorName(status)); 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru generalIteratorTest(*bi, x); 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestGetAvailableLocales() 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t locCount = 0; 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const Locale* locList = BreakIterator::getAvailableLocales(locCount); 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (locCount == 0) 7106d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("getAvailableLocales() returned an empty list!"); 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Just make sure that it's returning good memory. 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < locCount; ++i) { 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(locList[i].getName()); 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//Testing the BreakIterator::getDisplayName() function 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestGetDisplayName() 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result; 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator::getDisplayName(Locale::getUS(), result); 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (Locale::getDefault() == Locale::getUS() && result != "English (United States)") 7256d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("BreakIterator::getDisplayName() failed: expected \"English (United States)\", got \"" 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + result); 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator::getDisplayName(Locale::getFrance(), Locale::getUS(), result); 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (result != "French (France)") 7306d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("BreakIterator::getDisplayName() failed: expected \"French (France)\", got \"" 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + result); 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test End Behaviour 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @bug 4068137 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestEndBehaviour() 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString testString("boo."); 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *wb = BreakIterator::createWordInstance(Locale::getDefault(), status); 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 7446d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Failed to create the BreakIterator for default locale in TestEndBehaviour. - %s", u_errorName(status)); 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru wb->setText(testString); 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (wb->first() != 0) 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Didn't get break at beginning of string."); 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (wb->next() != 3) 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Didn't get break before period in \"boo.\""); 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (wb->current() != 4 && wb->next() != 4) 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Didn't get break at end of string."); 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete wb; 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @bug 4153072 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestBug4153072() { 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *iter = BreakIterator::createWordInstance(Locale::getDefault(), status); 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 7656d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Failed to create the BreakIterator for default locale in TestBug4153072 - %s", u_errorName(status)); 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str("...Hello, World!..."); 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t begin = 3; 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t end = str.length() - 3; 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool onBoundary; 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru StringCharacterIterator* textIterator = new StringCharacterIterator(str, begin, end, begin); 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter->adoptText(textIterator); 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int index; 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: with the switch to UText, there is no way to restrict the 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // iteration range to begin at an index other than zero. 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // String character iterators created with a non-zero bound are 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // treated by RBBI as being empty. 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (index = -1; index < begin + 1; ++index) { 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru onBoundary = iter->isBoundary(index); 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index == 0? !onBoundary : onBoundary) { 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Didn't handle isBoundary correctly with offset = " + index + 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " and begin index = " + begin); 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete iter; 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test for problem reported by Ashok Matoria on 9 July 2007 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// One.<kSoftHyphen><kSpace>Two. 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Sentence break at start (0) and then on calling next() it breaks at 796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 'T' of "Two". Now, at this point if I do next() and 797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// then previous(), it breaks at <kSOftHyphen> instead of 'T' of "Two". 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestBug5775() { 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *bi = BreakIterator::createSentenceInstance(Locale::getEnglish(), status); 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 8036d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru if (U_FAILURE(status)) { 8046d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru return; 8056d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru } 8066d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru// Check for status first for better handling of no data errors. 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(bi != NULL); 8086d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru if (bi == NULL) { 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 8116d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru 812c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString s("One.\\u00ad Two.", -1, US_INV); 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 01234 56789 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s = s.unescape(); 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi->setText(s); 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int pos = bi->next(); 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(pos == 6); 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = bi->next(); 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(pos == 10); 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = bi->previous(); 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(pos == 6); 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// RBBITest::Extended Run RBBI Tests from an external test data file 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct TestParams { 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *bi; 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString dataToBreak; 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector32 *expectedBreaks; 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector32 *srcLine; 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector32 *srcCol; 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::executeTest(TestParams *t) { 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t bp; 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prevBP; 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->bi == NULL) { 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->bi->setText(t->dataToBreak); 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Run the iterator forward 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevBP = -1; 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (bp = t->bi->first(); bp != BreakIterator::DONE; bp = t->bi->next()) { 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prevBP == bp) { 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fail for lack of forward progress. 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Forward Iteration, no forward progress. Break Pos=%4d File line,col=%4d,%4d", 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check that there were we didn't miss an expected break between the last one 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and this one. 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=prevBP+1; i<bp; i++) { 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->expectedBreaks->elementAti(i) != 0) { 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expected[] = {0, i}; 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printStringBreaks(t->dataToBreak, expected, 2); 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Forward Iteration, break expected, but not found. Pos=%4d File line,col= %4d,%4d", 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check that the break we did find was expected 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->expectedBreaks->elementAti(bp) == 0) { 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expected[] = {0, bp}; 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printStringBreaks(t->dataToBreak, expected, 2); 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Forward Iteration, break found, but not expected. Pos=%4d File line,col= %4d,%4d", 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The break was expected. 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check that the {nnn} tag value is correct. 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectedTagVal = t->expectedBreaks->elementAti(bp); 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedTagVal == -1) { 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedTagVal = 0; 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t line = t->srcLine->elementAti(bp); 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (rs != expectedTagVal) { 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Incorrect status for forward break. Pos=%4d File line,col= %4d,%4d.\n" 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " Actual, Expected status = %4d, %4d", 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal); 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevBP = bp; 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Verify that there were no missed expected breaks after the last one found 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=prevBP+1; i<t->expectedBreaks->size(); i++) { 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->expectedBreaks->elementAti(i) != 0) { 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Forward Iteration, break expected, but not found. Pos=%4d File line,col= %4d,%4d", 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Run the iterator backwards, verify that the same breaks are found. 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevBP = t->dataToBreak.length()+2; // start with a phony value for the last break pos seen. 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (bp = t->bi->last(); bp != BreakIterator::DONE; bp = t->bi->previous()) { 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prevBP == bp) { 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fail for lack of progress. 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Reverse Iteration, no progress. Break Pos=%4d File line,col=%4d,%4d", 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check that there were we didn't miss an expected break between the last one 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and this one. (UVector returns zeros for index out of bounds.) 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=prevBP-1; i>bp; i--) { 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->expectedBreaks->elementAti(i) != 0) { 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Reverse Itertion, break expected, but not found. Pos=%4d File line,col= %4d,%4d", 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check that the break we did find was expected 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->expectedBreaks->elementAti(bp) == 0) { 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Reverse Itertion, break found, but not expected. Pos=%4d File line,col= %4d,%4d", 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp)); 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The break was expected. 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check that the {nnn} tag value is correct. 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectedTagVal = t->expectedBreaks->elementAti(bp); 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedTagVal == -1) { 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedTagVal = 0; 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int line = t->srcLine->elementAti(bp); 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus(); 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (rs != expectedTagVal) { 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Incorrect status for reverse break. Pos=%4d File line,col= %4d,%4d.\n" 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " Actual, Expected status = %4d, %4d", 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal); 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevBP = bp; 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Verify that there were no missed breaks prior to the last one found 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=prevBP-1; i>=0; i--) { 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->expectedBreaks->elementAti(i) != 0) { 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Forward Itertion, break expected, but not found. Pos=%4d File line,col= %4d,%4d", 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i, t->srcLine->elementAti(i), t->srcCol->elementAti(i)); 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestExtended() { 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Locale locale(""); 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString rules; 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TestParams tp; 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.bi = NULL; 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.expectedBreaks = new UVector32(status); 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine = new UVector32(status); 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol = new UVector32(status); 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 974c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher localeMatcher(UNICODE_STRING_SIMPLE("<locale *([\\p{L}\\p{Nd}_]*) *>"), 0, status); 9756d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru if (U_FAILURE(status)) { 9766d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(status)); 9776d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru } 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Open and read the test data file. 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *testDataDirectory = IntlTest::getSourceTestData(status); 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char testFileName[1000]; 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFileName)) { 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Can't open test data. Path too long."); 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(testFileName, testDataDirectory); 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(testFileName, "rbbitst.txt"); 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int len; 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status); 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; /* something went wrong, error already output */ 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Put the test data into a UnicodeString 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString testString(FALSE, testFile, len); 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru enum EParseState{ 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru PARSE_COMMENT, 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru PARSE_TAG, 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru PARSE_DATA, 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru PARSE_NUM 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parseState = PARSE_TAG; 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru EParseState savedState = PARSE_TAG; 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar CH_LF = 0x0a; 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar CH_CR = 0x0d; 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar CH_HASH = 0x23; 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*static const UChar CH_PERIOD = 0x2e;*/ 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar CH_LT = 0x3c; 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar CH_GT = 0x3e; 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar CH_BACKSLASH = 0x5c; 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar CH_BULLET = 0x2022; 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lineNum = 1; 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t colStart = 0; 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t column = 0; 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t charIdx = 0; 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t tagValue = 0; // The numeric value of a <nnn> tag. 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (charIdx = 0; charIdx < len; ) { 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c = testString.charAt(charIdx); 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx++; 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == CH_CR && charIdx<len && testString.charAt(charIdx) == CH_LF) { 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // treat CRLF as a unit 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = CH_LF; 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx++; 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == CH_LF || c == CH_CR) { 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lineNum++; 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru colStart = charIdx; 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru column = charIdx - colStart + 1; 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (parseState) { 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case PARSE_COMMENT: 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == 0x0a || c == 0x0d) { 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parseState = savedState; 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case PARSE_TAG: 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == CH_HASH) { 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parseState = PARSE_COMMENT; 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru savedState = PARSE_TAG; 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u_isUWhiteSpace(c)) { 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testString.compare(charIdx-1, 6, "<word>") == 0) { 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete tp.bi; 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.bi = BreakIterator::createWordInstance(locale, status); 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx += 5; 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testString.compare(charIdx-1, 6, "<char>") == 0) { 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete tp.bi; 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.bi = BreakIterator::createCharacterInstance(locale, status); 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx += 5; 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testString.compare(charIdx-1, 6, "<line>") == 0) { 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete tp.bi; 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.bi = BreakIterator::createLineInstance(locale, status); 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx += 5; 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testString.compare(charIdx-1, 6, "<sent>") == 0) { 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete tp.bi; 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.bi = NULL; 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.bi = BreakIterator::createSentenceInstance(locale, status); 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx += 5; 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testString.compare(charIdx-1, 7, "<title>") == 0) { 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete tp.bi; 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.bi = BreakIterator::createTitleInstance(locale, status); 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx += 6; 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // <locale loc_name> 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localeMatcher.reset(testString); 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (localeMatcher.lookingAt(charIdx-1, status)) { 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString localeName = localeMatcher.group(1, status); 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char localeName8[100]; 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localeName.extract(0, localeName.length(), localeName8, sizeof(localeName8), 0); 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru locale = Locale::createFromName(localeName8); 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx += localeMatcher.group(0, status).length(); 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testString.compare(charIdx-1, 6, "<data>") == 0) { 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parseState = PARSE_DATA; 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx += 5; 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.dataToBreak = ""; 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.expectedBreaks->removeAllElements(); 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol ->removeAllElements(); 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine->removeAllElements(); 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("line %d: Tag expected in test file.", lineNum); 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parseState = PARSE_COMMENT; 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru savedState = PARSE_DATA; 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto end_test; // Stop the test. 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case PARSE_DATA: 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == CH_BULLET) { 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t breakIdx = tp.dataToBreak.length(); 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.expectedBreaks->setSize(breakIdx+1); 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.expectedBreaks->setElementAt(-1, breakIdx); 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine->setSize(breakIdx+1); 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine->setElementAt(lineNum, breakIdx); 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol ->setSize(breakIdx+1); 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol ->setElementAt(column, breakIdx); 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testString.compare(charIdx-1, 7, "</data>") == 0) { 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Add final entry to mappings from break location to source file position. 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Need one extra because last break position returned is after the 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // last char in the data, not at the last char. 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine->addElement(lineNum, status); 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol ->addElement(column, status); 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parseState = PARSE_TAG; 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx += 6; 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RUN THE TEST! 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru executeTest(&tp); 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (testString.compare(charIdx-1, 3, UNICODE_STRING_SIMPLE("\\N{")) == 0) { 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Named character, e.g. \N{COMBINING GRAVE ACCENT} 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Get the code point from the name and insert it into the test data. 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (Damn, no API takes names in Unicode !!! 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we've got to take it back to char *) 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nameEndIdx = testString.indexOf((UChar)0x7d/*'}'*/, charIdx); 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nameLength = nameEndIdx - (charIdx+2); 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char charNameBuf[200]; 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 theChar = -1; 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (nameEndIdx != -1) { 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testString.extract(charIdx+2, nameLength, charNameBuf, sizeof(charNameBuf)); 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charNameBuf[sizeof(charNameBuf)-1] = 0; 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru theChar = u_charFromName(U_UNICODE_CHAR_NAME, charNameBuf, &status); 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru theChar = -1; 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (theChar == -1) { 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Error in named character in test file at line %d, col %d", 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lineNum, column); 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Named code point was recognized. Insert it 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // into the test data. 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.dataToBreak.append(theChar); 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tp.dataToBreak.length() > tp.srcLine->size()) { 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine->addElement(lineNum, status); 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol ->addElement(column, status); 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (nameEndIdx > charIdx) { 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx = nameEndIdx+1; 1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testString.compare(charIdx-1, 2, "<>") == 0) { 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx++; 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t breakIdx = tp.dataToBreak.length(); 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.expectedBreaks->setSize(breakIdx+1); 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.expectedBreaks->setElementAt(-1, breakIdx); 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine->setSize(breakIdx+1); 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine->setElementAt(lineNum, breakIdx); 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol ->setSize(breakIdx+1); 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol ->setElementAt(column, breakIdx); 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == CH_LT) { 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tagValue = 0; 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parseState = PARSE_NUM; 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == CH_HASH && column==3) { // TODO: why is column off so far? 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parseState = PARSE_COMMENT; 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru savedState = PARSE_DATA; 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == CH_BACKSLASH) { 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check for \ at end of line, a line continuation. 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Advance over (discard) the newline 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 cp = testString.char32At(charIdx); 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp == CH_CR && charIdx<len && testString.charAt(charIdx+1) == CH_LF) { 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have a CR LF 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Need an extra increment of the input ptr to move over both of them 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx++; 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp == CH_LF || cp == CH_CR) { 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lineNum++; 1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru colStart = charIdx; 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx++; 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Let unescape handle the back slash. 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp = testString.unescapeAt(charIdx); 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp != -1) { 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Escape sequence was recognized. Insert the char 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // into the test data. 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.dataToBreak.append(cp); 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tp.dataToBreak.length() > tp.srcLine->size()) { 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine->addElement(lineNum, status); 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol ->addElement(column, status); 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Not a recognized backslash escape sequence. 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Take the next char as a literal. 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: Should this be an error? 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = testString.charAt(charIdx); 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charIdx = testString.moveIndex32(charIdx, 1); 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Normal, non-escaped data char. 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.dataToBreak.append(c); 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save the mapping from offset in the data to line/column numbers in 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the original input file. Will be used for better error messages only. 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If there's an expected break before this char, the slot in the mapping 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // vector will already be set for this char; don't overwrite it. 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (tp.dataToBreak.length() > tp.srcLine->size()) { 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine->addElement(lineNum, status); 1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol ->addElement(column, status); 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case PARSE_NUM: 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We are parsing an expected numeric tag value, like <1234>, 1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // within a chunk of data. 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u_isUWhiteSpace(c)) { 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == CH_GT) { 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Finished the number. Add the info to the expected break data, 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and switch parse state back to doing plain data. 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parseState = PARSE_DATA; 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (tagValue == 0) { 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tagValue = -1; 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t breakIdx = tp.dataToBreak.length(); 1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.expectedBreaks->setSize(breakIdx+1); 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.expectedBreaks->setElementAt(tagValue, breakIdx); 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine->setSize(breakIdx+1); 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcLine->setElementAt(lineNum, breakIdx); 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol ->setSize(breakIdx+1); 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tp.srcCol ->setElementAt(column, breakIdx); 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u_isdigit(c)) { 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tagValue = tagValue*10 + u_charDigitValue(c); 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Syntax Error in test file at line %d, col %d", 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lineNum, column); 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parseState = PARSE_COMMENT; 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto end_test; // Stop the test 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho dataerrln("ICU Error %s while parsing test file at line %d.", 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_errorName(status), lineNum); 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto end_test; // Stop the test 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruend_test: 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete tp.bi; 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete tp.expectedBreaks; 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete tp.srcLine; 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete tp.srcCol; 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete [] testFile; 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------- 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 132450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// TestDictRules create a break iterator from source rules that includes a 132550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// dictionary range. Regression for bug #7130. Source rules 132650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// do not declare a break iterator type (word, line, sentence, etc. 132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// but the dictionary code, without a type, would loop. 132850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 132950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------- 133050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RBBITest::TestDictRules() { 133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *rules = "$dictionary = [a-z]; \n" 133250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "!!forward; \n" 133350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "$dictionary $dictionary; \n" 133450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "!!reverse; \n" 133550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "$dictionary $dictionary; \n"; 133650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *text = "aa"; 133750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError parseError; 133950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 134050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RuleBasedBreakIterator bi(rules, parseError, status); 134150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 134250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString utext = text; 134350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bi.setText(utext); 134450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t position; 134550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loops; 134650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (loops = 0; loops<10; loops++) { 134750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho position = bi.next(); 134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (position == RuleBasedBreakIterator::DONE) { 134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho TEST_ASSERT(loops == 1); 135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Error creating RuleBasedBreakIterator: %s", u_errorName(status)); 135550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 135650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 135850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//------------------------------------------------------------------------------- 136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// ReadAndConvertFile Read a text data file, convert it to UChars, and 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// return the datain one big UChar * buffer, which the caller must delete. 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// parameters: 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// fileName: the name of the file, with no directory part. The test data directory 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// is assumed. 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// ulen an out parameter, receives the actual length (in UChars) of the file data. 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// encoding The file encoding. If the file contains a BOM, that will override the encoding 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// specified here. The BOM, if it exists, will be stripped from the returned data. 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Pass NULL for the system default encoding. 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// status 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// returns: 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The file data, converted to UChar. 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The caller must delete this when done with 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// delete [] theBuffer; 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// TODO: This is a clone of RegexTest::ReadAndConvertFile. 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Move this function to some common place. 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar *RBBITest::ReadAndConvertFile(const char *fileName, int &ulen, const char *encoding, UErrorCode &status) { 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *retPtr = NULL; 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *fileBuf = NULL; 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverter* conv = NULL; 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FILE *f = NULL; 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ulen = 0; 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retPtr; 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Open the file. 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru f = fopen(fileName, "rb"); 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (f == 0) { 13986d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("Error opening test data file %s\n", fileName); 1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_FILE_ACCESS_ERROR; 1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Read it in 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int fileSize; 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int amt_read; 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fseek( f, 0, SEEK_END); 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileSize = ftell(f); 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileBuf = new char[fileSize]; 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fseek(f, 0, SEEK_SET); 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru amt_read = fread(fileBuf, 1, fileSize, f); 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (amt_read != fileSize || fileSize <= 0) { 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Error reading test data file."); 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto cleanUpAndReturn; 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Look for a Unicode Signature (BOM) on the data just read 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t signatureLength; 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char * fileBufC; 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* bomEncoding; 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileBufC = fileBuf; 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bomEncoding = ucnv_detectUnicodeSignature( 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileBuf, fileSize, &signatureLength, &status); 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(bomEncoding!=NULL ){ 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileBufC += signatureLength; 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileSize -= signatureLength; 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru encoding = bomEncoding; 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Open a converter to take the rule file to UTF-16 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru conv = ucnv_open(encoding, &status); 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto cleanUpAndReturn; 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert the rules to UChar. 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Preflight first to determine required buffer size. 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ulen = ucnv_toUChars(conv, 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL, // dest, 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, // destCapacity, 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileBufC, 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileSize, 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &status); 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (status == U_BUFFER_OVERFLOW_ERROR) { 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Buffer Overflow is expected from the preflight operation. 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retPtr = new UChar[ulen+1]; 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_toUChars(conv, 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retPtr, // dest, 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ulen+1, 1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileBufC, 1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileSize, 1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &status); 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucleanUpAndReturn: 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fclose(f); 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete []fileBuf; 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_close(conv); 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status)); 1471b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete []retPtr; 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retPtr = 0; 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ulen = 0; 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retPtr; 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------------------- 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Run tests from each of the boundary test data files distributed by the Unicode Consortium 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------- 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestUnicodeFiles() { 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RuleBasedBreakIterator *bi; 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 148927f654740f2a26ad62a5c155af9199af9e69b889claireho bi = (RuleBasedBreakIterator *)BreakIterator::createCharacterInstance(Locale::getEnglish(), status); 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru runUnicodeTestData("GraphemeBreakTest.txt", bi); 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 149627f654740f2a26ad62a5c155af9199af9e69b889claireho bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru runUnicodeTestData("WordBreakTest.txt", bi); 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 150327f654740f2a26ad62a5c155af9199af9e69b889claireho bi = (RuleBasedBreakIterator *)BreakIterator::createSentenceInstance(Locale::getEnglish(), status); 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru runUnicodeTestData("SentenceBreakTest.txt", bi); 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 151027f654740f2a26ad62a5c155af9199af9e69b889claireho bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 1513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru runUnicodeTestData("LineBreakTest.txt", bi); 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------------------- 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Run tests from one of the boundary test data files distributed by the Unicode Consortium 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------- 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *bi) { 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 1526103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // TODO(andy): Match line break behavior to Unicode 6.0 and remove this time bomb. Ticket #7270 152754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius UBool isTicket7270Fixed = isICUVersionAtLeast(52, 1); 1528103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UBool isLineBreak = 0 == strcmp(fileName, "LineBreakTest.txt"); 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Open and read the test data file, put it into a UnicodeString. 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *testDataDirectory = IntlTest::getSourceTestData(status); 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char testFileName[1000]; 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testDataDirectory == NULL || strlen(testDataDirectory) >= sizeof(testFileName)) { 15376d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("Can't open test data. Path too long."); 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(testFileName, testDataDirectory); 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(testFileName, fileName); 1542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru logln("Opening data file %s\n", fileName); 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int len; 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *testFile = ReadAndConvertFile(testFileName, len, "UTF-8", status); 1547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (status != U_FILE_ACCESS_ERROR) { 1548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TEST_ASSERT(testFile != NULL); 1550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status) || testFile == NULL) { 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; /* something went wrong, error already output */ 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString testFileAsString(TRUE, testFile, len); 1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Parse the test data file using a regular expression. 1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Each kind of token is recognized in its own capture group; what type of item was scanned 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // is identified by which group had a match. 1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Caputure Group # 1 2 3 4 5 1562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Parses this item: divide x hex digits comment \n unrecognized \n 1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString tokenExpr("[ \t]*(?:(\\u00F7)|(\\u00D7)|([0-9a-fA-F]+)|((?:#.*?)?$.)|(.*?$.))", -1, US_INV); 1565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru RegexMatcher tokenMatcher(tokenExpr, testFileAsString, UREGEX_MULTILINE | UREGEX_DOTALL, status); 1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString testString; 1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector32 breakPositions(status); 1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int lineNumber = 1; 1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scan through each test case, building up the string to be broken in testString, 1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and the positions that should be boundaries in the breakPositions vector. 1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 157827f654740f2a26ad62a5c155af9199af9e69b889claireho int spin = 0; 1579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (tokenMatcher.find()) { 158027f654740f2a26ad62a5c155af9199af9e69b889claireho if(tokenMatcher.hitEnd()) { 158127f654740f2a26ad62a5c155af9199af9e69b889claireho /* Shouldnt Happen(TM). This means we didn't find the symbols we were looking for. 158227f654740f2a26ad62a5c155af9199af9e69b889claireho This occurred when the text file was corrupt (wasn't marked as UTF-8) 158327f654740f2a26ad62a5c155af9199af9e69b889claireho and caused an infinite loop here on EBCDIC systems! 158427f654740f2a26ad62a5c155af9199af9e69b889claireho */ 158527f654740f2a26ad62a5c155af9199af9e69b889claireho fprintf(stderr,"FAIL: hit end of file %s for the %8dth time- corrupt data file?\r", fileName, ++spin); 158627f654740f2a26ad62a5c155af9199af9e69b889claireho // return; 158727f654740f2a26ad62a5c155af9199af9e69b889claireho } 1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (tokenMatcher.start(1, status) >= 0) { 1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scanned a divide sign, indicating a break position in the test data. 1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testString.length()>0) { 1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru breakPositions.addElement(testString.length(), status); 1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (tokenMatcher.start(2, status) >= 0) { 1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scanned an 'x', meaning no break at this position in the test data 1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Nothing to be done here. 1597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (tokenMatcher.start(3, status) >= 0) { 1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scanned Hex digits. Convert them to binary, append to the character data string. 1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &hexNumber = tokenMatcher.group(3, status); 1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int length = hexNumber.length(); 1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (length<=8) { 1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buf[10]; 1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hexNumber.extract (0, length, buf, sizeof(buf), US_INV); 1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = (UChar32)strtol(buf, NULL, 16); 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c<=0x10ffff) { 1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testString.append(c); 1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Error: Unicode Character value out of range. \'%s\', line %d.\n", 1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileName, lineNumber); 1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Syntax Error: Hex Unicode Character value must have no more than 8 digits at \'%s\', line %d.\n", 1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fileName, lineNumber); 1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (tokenMatcher.start(4, status) >= 0) { 1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scanned to end of a line, possibly skipping over a comment in the process. 1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the line from the file contained test data, run the test now. 1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testString.length() > 0) { 162254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius// TODO(andy): Remove this time bomb code. Note: Failing line numbers may change when updating to new Unicode data. 162354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius// Rule 8 162454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius// ZW SP* <break> 162554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius// is not yet implemented. 162654dcd9b6a06071f647dac967e9e267abb9410720Craig Corneliusif (!(isLineBreak && !isTicket7270Fixed && (5198 == lineNumber || 162754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 5202 == lineNumber || 162854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 5214 == lineNumber || 162954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 5246 == lineNumber || 163054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 5298 == lineNumber || 163154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 5302 == lineNumber ))) { 1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkUnicodeTestCase(fileName, lineNumber, testString, &breakPositions, bi); 163327f654740f2a26ad62a5c155af9199af9e69b889claireho} 1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Clear out this test case. 1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The string and breakPositions vector will be refilled as the next 1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test case is parsed. 1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testString.remove(); 1640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru breakPositions.removeAllElements(); 1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lineNumber++; 1642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scanner catchall. Something unrecognized appeared on the line. 1644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char token[16]; 1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString uToken = tokenMatcher.group(0, status); 1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uToken.extract(0, uToken.length(), token, (uint32_t)sizeof(token)); 1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru token[sizeof(token)-1] = 0; 1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Syntax error in test data file \'%s\', line %d. Scanning \"%s\"\n", fileName, lineNumber, token); 1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Clean up, in preparation for continuing with the next line. 1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testString.remove(); 1652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru breakPositions.removeAllElements(); 1653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lineNumber++; 1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete [] testFile; 1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------------------- 1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// checkUnicodeTestCase() Run one test case from one of the Unicode Consortium 1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// test data files. Do only a simple, forward-only check - 1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// this test is mostly to check that ICU and the Unicode 1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// data agree with each other. 1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------------------- 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::checkUnicodeTestCase(const char *testFileName, int lineNumber, 1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &testString, // Text data to be broken 1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector32 *breakPositions, // Positions where breaks should be found. 1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RuleBasedBreakIterator *bi) { 1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t pos; // Break Position in the test string 1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectedI = 0; // Index of expected break position in the vector of expected results. 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectedPos; // Expected break position (index into test string) 1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi->setText(testString); 1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = bi->first(); 1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = bi->next(); 1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (pos != BreakIterator::DONE) { 1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedI >= breakPositions->size()) { 1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Test file \"%s\", line %d, unexpected break found at position %d", 1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testFileName, lineNumber, pos); 1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedPos = breakPositions->elementAti(expectedI); 1692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pos < expectedPos) { 1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Test file \"%s\", line %d, unexpected break found at position %d", 1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testFileName, lineNumber, pos); 1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pos > expectedPos) { 1698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("Test file \"%s\", line %d, failed to find expected break at position %d", 1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testFileName, lineNumber, expectedPos); 1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = bi->next(); 1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedI++; 1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pos==BreakIterator::DONE && expectedI<breakPositions->size()) { 1707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("Test file \"%s\", line %d, failed to find expected break at position %d", 1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testFileName, lineNumber, breakPositions->elementAti(expectedI)); 1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// classs RBBIMonkeyKind 1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Monkey Test for Break Iteration 1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Abstract interface class. Concrete derived classes independently 1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// implement the break rules for different iterator types. 1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The Monkey Test itself uses doesn't know which type of break iterator it is 1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// testing, but works purely in terms of the interface defined here. 1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass RBBIMonkeyKind { 1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Return a UVector of UnicodeSets, representing the character classes used 1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for this type of iterator. 1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UVector *charClasses() = 0; 1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Set the test text on which subsequent calls to next() will operate 1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual void setText(const UnicodeString &s) = 0; 1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Find the next break postion, starting from the prev break position, or from zero. 1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Return -1 after reaching end of string. 1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual int32_t next(int32_t i) = 0; 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual ~RBBIMonkeyKind(); 1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode deferredStatus; 1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprotected: 1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBIMonkeyKind(); 1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBIMonkeyKind::RBBIMonkeyKind() { 1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru deferredStatus = U_ZERO_ERROR; 1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBIMonkeyKind::~RBBIMonkeyKind() { 1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------------------------- 1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Random Numbers. Similar to standard lib rand() and srand() 1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Not using library to 1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1. Get same results on all platforms. 1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2. Get access to current seed, to more easily reproduce failures. 1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------------- 1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t m_seed = 1; 1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t m_rand() 1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m_seed = m_seed * 1103515245 + 12345; 1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (uint32_t)(m_seed/65536) % 32768; 1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------ 1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// class RBBICharMonkey Character (Grapheme Cluster) specific implementation 1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// of RBBIMonkeyKind. 1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------ 1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass RBBICharMonkey: public RBBIMonkeyKind { 1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBICharMonkey(); 1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual ~RBBICharMonkey(); 1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UVector *charClasses(); 1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual void setText(const UnicodeString &s); 1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual int32_t next(int32_t i); 1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector *fSets; 1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fCRLFSet; 1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fControlSet; 1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fExtendSet; 179454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius UnicodeSet *fRegionalIndicatorSet; 1795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fPrependSet; 1796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fSpacingSet; 1797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fLSet; 1798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fVSet; 1799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fTSet; 1800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fLVSet; 1801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fLVTSet; 1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fHangulSet; 1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fAnySet; 1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *fText; 1806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBICharMonkey::RBBICharMonkey() { 1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fText = NULL; 1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fCRLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\r\\n]"), status); 1815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fControlSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Control}]"), status); 1816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Extend}]"), status); 181754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fRegionalIndicatorSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Regional_Indicator}]"), status); 1818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPrependSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Prepend}]"), status); 1819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSpacingSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = SpacingMark}]"), status); 1820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = L}]"), status); 1821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fVSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = V}]"), status); 1822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = T}]"), status); 1823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLVSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LV}]"), status); 1824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLVTSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LVT}]"), status); 1825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHangulSet = new UnicodeSet(); 1826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHangulSet->addAll(*fLSet); 1827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHangulSet->addAll(*fVSet); 1828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHangulSet->addAll(*fTSet); 1829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHangulSet->addAll(*fLVSet); 1830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHangulSet->addAll(*fLVTSet); 1831103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fAnySet = new UnicodeSet(0, 0x10ffff); 1832103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = new UVector(status); 1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fCRLFSet, status); 1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fControlSet, status); 1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fExtendSet, status); 183754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fSets->addElement(fRegionalIndicatorSet, status); 1838103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (!fPrependSet->isEmpty()) { 1839103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fSets->addElement(fPrependSet, status); 1840103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 1841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fSpacingSet, status); 1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fHangulSet, status); 1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fAnySet, status); 1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru deferredStatus = status; 1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBICharMonkey::setText(const UnicodeString &s) { 1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fText = &s; 1852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruint32_t RBBICharMonkey::next(int32_t prevPos) { 1857c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int p0, p1, p2, p3; // Indices of the significant code points around the 1858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // break position being tested. The candidate break 1859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // location is before p2. 1860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1861c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int breakPos = -1; 1862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32 c0, c1, c2, c3; // The code points at p0, p1, p2 & p3. 1864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(deferredStatus)) { 1866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Previous break at end of string. return DONE. 1870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (prevPos >= fText->length()) { 1871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 1872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru p0 = p1 = p2 = p3 = prevPos; 1874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c3 = fText->char32At(prevPos); 1875c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c0 = c1 = c2 = 0; 1876c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1877c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Loop runs once per "significant" character position in the input text. 1878c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 1879c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Move all of the positions forward in the input string. 1880c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru p0 = p1; c0 = c1; 1881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru p1 = p2; c1 = c2; 1882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru p2 = p3; c2 = c3; 1883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1884c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Advancd p3 by one codepoint 1885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru p3 = fText->moveIndex32(p3, 1); 1886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c3 = fText->char32At(p3); 1887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (p1 == p2) { 1889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Still warming up the loop. (won't work with zero length strings, but we don't care) 1890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1892c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (p2 == fText->length()) { 1893c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Reached end of string. Always a break position. 1894c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1895c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1896c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule GB3 CR x LF 1898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // No Extend or Format characters may appear between the CR and LF, 1899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // which requires the additional check for p2 immediately following p1. 1900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c1==0x0D && c2==0x0A && p1==(p2-1)) { 1902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1903c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1904c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (GB4). ( Control | CR | LF ) <break> 1906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fControlSet->contains(c1) || 1907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c1 == 0x0D || 1908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c1 == 0x0A) { 1909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (GB5) <break> ( Control | CR | LF ) 1913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 1914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fControlSet->contains(c2) || 1915c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c2 == 0x0D || 1916c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c2 == 0x0A) { 1917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (GB6) L x ( L | V | LV | LVT ) 1922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fLSet->contains(c1) && 1923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (fLSet->contains(c2) || 1924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fVSet->contains(c2) || 1925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLVSet->contains(c2) || 1926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLVTSet->contains(c2))) { 1927c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1928c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1929c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1930c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (GB7) ( LV | V ) x ( V | T ) 1931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((fLVSet->contains(c1) || fVSet->contains(c1)) && 1932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (fVSet->contains(c2) || fTSet->contains(c2))) { 1933c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1934c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1935c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (GB8) ( LVT | T) x T 1937c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((fLVTSet->contains(c1) || fTSet->contains(c1)) && 1938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTSet->contains(c2)) { 1939c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1940c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1941c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 194254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // Rule (GB8a) Regional_Indicator x Regional_Indicator 194354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (fRegionalIndicatorSet->contains(c1) && fRegionalIndicatorSet->contains(c2)) { 194454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius continue; 194554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } 194654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 1947c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (GB9) Numeric x ALetter 1948c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fExtendSet->contains(c2)) { 1949c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1950c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (GB9a) x SpacingMark 1953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fSpacingSet->contains(c2)) { 1954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (GB9b) Prepend x 1958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fPrependSet->contains(c1)) { 1959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 1960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (GB10) Any <break> Any 1963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 1964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru breakPos = p2; 1967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return breakPos; 1968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUVector *RBBICharMonkey::charClasses() { 1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fSets; 1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBICharMonkey::~RBBICharMonkey() { 1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSets; 1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fCRLFSet; 1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fControlSet; 1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fExtendSet; 198254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius delete fRegionalIndicatorSet; 1983c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fPrependSet; 1984c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fSpacingSet; 1985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fLSet; 1986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fVSet; 1987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fTSet; 1988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fLVSet; 1989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fLVTSet; 1990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fHangulSet; 1991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fAnySet; 1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------ 1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// class RBBIWordMonkey Word Break specific implementation 1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// of RBBIMonkeyKind. 1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------ 2000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass RBBIWordMonkey: public RBBIMonkeyKind { 2001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 2002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBIWordMonkey(); 2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual ~RBBIWordMonkey(); 2004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UVector *charClasses(); 2005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual void setText(const UnicodeString &s); 2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual int32_t next(int32_t i); 2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector *fSets; 2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fCRSet; 2011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fLFSet; 2012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fNewlineSet; 2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fKatakanaSet; 2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fALetterSet; 201554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // TODO(jungshik): Do we still need this change? 201654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // UnicodeSet *fALetterSet; // matches ALetterPlus in word.txt 2017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fMidNumLetSet; 2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fMidLetterSet; 2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fMidNumSet; 2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fNumericSet; 2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fFormatSet; 2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fOtherSet; 2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fExtendSet; 2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fExtendNumLetSet; 202554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius UnicodeSet *fRegionalIndicatorSet; 202654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius UnicodeSet *fDictionaryCjkSet; 2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *fMatcher; 2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *fText; 2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBIWordMonkey::RBBIWordMonkey() 2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = new UVector(status); 2039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2040c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fCRSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = CR}]"), status); 2041c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLFSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = LF}]"), status); 2042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fNewlineSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Newline}]"), status); 204354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fDictionaryCjkSet= new UnicodeSet("[[\\uac00-\\ud7a3][:Han:][:Hiragana:][:Katakana:]]", status); 204454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // Exclude Hangul syllables from ALetterSet during testing. 204554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // Leave CJK dictionary characters out from the monkey tests! 204654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#if 0 204754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fALetterSet = new UnicodeSet("[\\p{Word_Break = ALetter}" 204854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "[\\p{Line_Break = Complex_Context}" 204954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "-\\p{Grapheme_Cluster_Break = Extend}" 205054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "-\\p{Grapheme_Cluster_Break = Control}" 205154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "]]", 205254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius status); 205354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#endif 205454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fALetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ALetter}]"), status); 205554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fALetterSet->removeAll(*fDictionaryCjkSet); 2056c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fKatakanaSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Katakana}]"), status); 2057c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMidNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNumLet}]"), status); 2058c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMidLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidLetter}]"), status); 2059c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMidNumSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = MidNum}]"), status); 206054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // TODO: this set used to contain [\\uff10-\\uff19] (fullwidth digits), but this breaks the test 206154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // we should figure out why 2062c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Numeric}]"), status); 2063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Format}]"), status); 2064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fExtendNumLetSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = ExtendNumLet}]"), status); 2065c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Extend}]"), status); 206654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fRegionalIndicatorSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Regional_Indicator}]"), status); 2067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet = new UnicodeSet(); 2069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(status)) { 2070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru deferredStatus = status; 2071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->complement(); 2075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fOtherSet->removeAll(*fCRSet); 2076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fOtherSet->removeAll(*fLFSet); 2077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fOtherSet->removeAll(*fNewlineSet); 2078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fKatakanaSet); 2079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fALetterSet); 2080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fMidLetterSet); 2081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fMidNumSet); 2082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fNumericSet); 2083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fExtendNumLetSet); 2084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fFormatSet); 2085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fExtendSet); 208654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fOtherSet->removeAll(*fRegionalIndicatorSet); 2087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Inhibit dictionary characters from being tested at all. 208854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fOtherSet->removeAll(*fDictionaryCjkSet); 2089c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fOtherSet->removeAll(UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{LineBreak = Complex_Context}]"), status)); 2090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fCRSet, status); 2092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fLFSet, status); 2093c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fNewlineSet, status); 2094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fALetterSet, status); 209554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius //fSets->addElement(fKatakanaSet, status); //TODO: work out how to test katakana 2096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fMidLetterSet, status); 2097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fMidNumLetSet, status); 2098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fMidNumSet, status); 2099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fNumericSet, status); 2100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fFormatSet, status); 2101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fExtendSet, status); 2102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fOtherSet, status); 2103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fExtendNumLetSet, status); 210454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fSets->addElement(fRegionalIndicatorSet, status); 2105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 2107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru deferredStatus = status; 2108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBIWordMonkey::setText(const UnicodeString &s) { 2112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fText = &s; 2113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t RBBIWordMonkey::next(int32_t prevPos) { 2117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int p0, p1, p2, p3; // Indices of the significant code points around the 2118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // break position being tested. The candidate break 2119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // location is before p2. 2120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int breakPos = -1; 2122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c0, c1, c2, c3; // The code points at p0, p1, p2 & p3. 2124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(deferredStatus)) { 2126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 2127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Prev break at end of string. return DONE. 2130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prevPos >= fText->length()) { 2131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p0 = p1 = p2 = p3 = prevPos; 2134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c3 = fText->char32At(prevPos); 2135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c0 = c1 = c2 = 0; 2136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop runs once per "significant" character position in the input text. 2138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 2139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Move all of the positions forward in the input string. 2140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p0 = p1; c0 = c1; 2141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p1 = p2; c1 = c2; 2142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p2 = p3; c2 = c3; 2143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Advancd p3 by X(Extend | Format)* Rule 4 2145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // But do not advance over Extend & Format following a new line. (Unicode 5.1 change) 2146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p3 = fText->moveIndex32(p3, 1); 2148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c3 = fText->char32At(p3); 2149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fCRSet->contains(c2) || fLFSet->contains(c2) || fNewlineSet->contains(c2)) { 2150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 2152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (fFormatSet->contains(c3) || fExtendSet->contains(c3)); 2154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p1 == p2) { 2157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Still warming up the loop. (won't work with zero length strings, but we don't care) 2158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p2 == fText->length()) { 2161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Reached end of string. Always a break position. 2162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (3) CR x LF 2166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No Extend or Format characters may appear between the CR and LF, 2167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // which requires the additional check for p2 immediately following p1. 2168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 2169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c1==0x0D && c2==0x0A) { 2170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (3a) Break before and after newlines (including CR and LF) 2174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // 2175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fCRSet->contains(c1) || fLFSet->contains(c1) || fNewlineSet->contains(c1)) { 2176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 2178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fCRSet->contains(c2) || fLFSet->contains(c2) || fNewlineSet->contains(c2)) { 2179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 2180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru }; 2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (5). ALetter x ALetter 2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fALetterSet->contains(c1) && 2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fALetterSet->contains(c2)) { 2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (6) ALetter x (MidLetter | MidNumLet) ALetter 2189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 2190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( fALetterSet->contains(c1) && 2191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (fMidLetterSet->contains(c2) || fMidNumLetSet->contains(c2)) && 2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fALetterSet->contains(c3)) { 2193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (7) ALetter (MidLetter | MidNumLet) x ALetter 2198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fALetterSet->contains(c0) && 2199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (fMidLetterSet->contains(c1) || fMidNumLetSet->contains(c1)) && 2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fALetterSet->contains(c2)) { 2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (8) Numeric x Numeric 2205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fNumericSet->contains(c1) && 2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fNumericSet->contains(c2)) { 2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (9) ALetter x Numeric 2211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fALetterSet->contains(c1) && 2212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fNumericSet->contains(c2)) { 2213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (10) Numeric x ALetter 2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fNumericSet->contains(c1) && 2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fALetterSet->contains(c2)) { 2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (11) Numeric (MidNum | MidNumLet) x Numeric 2223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fNumericSet->contains(c0) && 2224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (fMidNumSet->contains(c1) || fMidNumLetSet->contains(c1)) && 2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fNumericSet->contains(c2)) { 2226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (12) Numeric x (MidNum | MidNumLet) Numeric 2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fNumericSet->contains(c1) && 2231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (fMidNumSet->contains(c2) || fMidNumLetSet->contains(c2)) && 2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fNumericSet->contains(c3)) { 2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (13) Katakana x Katakana 2237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fKatakanaSet->contains(c1) && 2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fKatakanaSet->contains(c2)) { 2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule 13a 2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((fALetterSet->contains(c1) || fNumericSet->contains(c1) || 2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fKatakanaSet->contains(c1) || fExtendNumLetSet->contains(c1)) && 2245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fExtendNumLetSet->contains(c2)) { 2246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 224754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } 2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule 13b 2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fExtendNumLetSet->contains(c1) && 2251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (fALetterSet->contains(c2) || fNumericSet->contains(c2) || 2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fKatakanaSet->contains(c2))) { 2253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 225454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } 225554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 225654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // Rule 13c 225754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (fRegionalIndicatorSet->contains(c1) && fRegionalIndicatorSet->contains(c2)) { 225854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius continue; 225954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } 2260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule 14. Break found here. 2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru breakPos = p2; 2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return breakPos; 2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUVector *RBBIWordMonkey::charClasses() { 2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fSets; 2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBIWordMonkey::~RBBIWordMonkey() { 2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSets; 2277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fCRSet; 2278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fLFSet; 2279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fNewlineSet; 2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fKatakanaSet; 2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fALetterSet; 2282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fMidNumLetSet; 2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fMidLetterSet; 2284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fMidNumSet; 2285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fNumericSet; 2286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fFormatSet; 2287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fExtendSet; 2288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fExtendNumLetSet; 228954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius delete fRegionalIndicatorSet; 229054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius delete fDictionaryCjkSet; 2291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fOtherSet; 2292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------ 2298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// class RBBISentMonkey Sentence Break specific implementation 2300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// of RBBIMonkeyKind. 2301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------ 2303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass RBBISentMonkey: public RBBIMonkeyKind { 2304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 2305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBISentMonkey(); 2306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual ~RBBISentMonkey(); 2307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UVector *charClasses(); 2308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual void setText(const UnicodeString &s); 2309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual int32_t next(int32_t i); 2310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 2311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int moveBack(int posFrom); 2312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int moveForward(int posFrom); 2313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 cAt(int pos); 2314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector *fSets; 2316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fSepSet; 2318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fFormatSet; 2319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fSpSet; 2320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fLowerSet; 2321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fUpperSet; 2322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fOLetterSet; 2323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fNumericSet; 2324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fATermSet; 2325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *fSContinueSet; 2326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fSTermSet; 2327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fCloseSet; 2328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fOtherSet; 2329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fExtendSet; 2330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *fText; 2332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 2334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBISentMonkey::RBBISentMonkey() 2336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 2338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = new UVector(status); 2340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Separator Set Note: Beginning with Unicode 5.1, CR and LF were removed from the separator 2342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // set and made into character classes of their own. For the monkey impl, 2343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // they remain in SEP, since Sep always appears with CR and LF in the rules. 2344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSepSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sep} \\u000a \\u000d]"), status); 2345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFormatSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Format}]"), status); 2346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSpSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sp}]"), status); 2347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLowerSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Lower}]"), status); 2348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fUpperSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Upper}]"), status); 2349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fOLetterSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = OLetter}]"), status); 2350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fNumericSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Numeric}]"), status); 2351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fATermSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = ATerm}]"), status); 2352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSContinueSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = SContinue}]"), status); 2353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSTermSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = STerm}]"), status); 2354c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fCloseSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Close}]"), status); 2355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Extend}]"), status); 2356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet = new UnicodeSet(); 2357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(status)) { 2359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru deferredStatus = status; 2360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->complement(); 2364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fSepSet); 2365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fFormatSet); 2366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fSpSet); 2367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fLowerSet); 2368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fUpperSet); 2369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fOLetterSet); 2370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fNumericSet); 2371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fATermSet); 2372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fOtherSet->removeAll(*fSContinueSet); 2373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fSTermSet); 2374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fCloseSet); 2375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOtherSet->removeAll(*fExtendSet); 2376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fSepSet, status); 2378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fFormatSet, status); 2379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fSpSet, status); 2380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fLowerSet, status); 2381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fUpperSet, status); 2382c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fOLetterSet, status); 2383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fNumericSet, status); 2384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fATermSet, status); 2385c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fSContinueSet, status); 2386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fSTermSet, status); 2387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fCloseSet, status); 2388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fOtherSet, status); 2389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSets->addElement(fExtendSet, status); 2390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 2392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru deferredStatus = status; 2393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBISentMonkey::setText(const UnicodeString &s) { 2399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fText = &s; 2400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUVector *RBBISentMonkey::charClasses() { 2403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fSets; 2404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// moveBack() Find the "significant" code point preceding the index i. 2408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Skips over ($Extend | $Format)* . 2409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint RBBISentMonkey::moveBack(int i) { 2411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i <= 0) { 2412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t j = i; 2416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru j = fText->moveIndex32(j, -1); 2418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = fText->char32At(j); 2419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (j>0 &&(fFormatSet->contains(c) || fExtendSet->contains(c))); 2421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return j; 2422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint RBBISentMonkey::moveForward(int i) { 2427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i>=fText->length()) { 2428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fText->length(); 2429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t j = i; 2432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru j = fText->moveIndex32(j, 1); 2434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = cAt(j); 2435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (fFormatSet->contains(c) || fExtendSet->contains(c)); 2437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return j; 2438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 RBBISentMonkey::cAt(int pos) { 2441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pos<0 || pos>=fText->length()) { 2442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fText->char32At(pos); 2445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t RBBISentMonkey::next(int32_t prevPos) { 2449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int p0, p1, p2, p3; // Indices of the significant code points around the 2450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // break position being tested. The candidate break 2451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // location is before p2. 2452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int breakPos = -1; 2454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c0, c1, c2, c3; // The code points at p0, p1, p2 & p3. 2456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(deferredStatus)) { 2459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 2460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Prev break at end of string. return DONE. 2463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prevPos >= fText->length()) { 2464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p0 = p1 = p2 = p3 = prevPos; 2467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c3 = fText->char32At(prevPos); 2468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c0 = c1 = c2 = 0; 2469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop runs once per "significant" character position in the input text. 2471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 2472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Move all of the positions forward in the input string. 2473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p0 = p1; c0 = c1; 2474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p1 = p2; c1 = c2; 2475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p2 = p3; c2 = c3; 2476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Advancd p3 by X(Extend | Format)* Rule 4 2478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p3 = moveForward(p3); 2479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c3 = cAt(p3); 2480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (3) CR x LF 2482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c1==0x0d && c2==0x0a && p2==(p1+1)) { 2483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (4). Sep <break> 2487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fSepSet->contains(c1)) { 2488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p2 = p1+1; // Separators don't combine with Extend or Format. 2489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p2 >= fText->length()) { 2493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Reached end of string. Always a break position. 2494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p2 == prevPos) { 2498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Still warming up the loop. (won't work with zero length strings, but we don't care) 2499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (6). ATerm x Numeric 2503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fATermSet->contains(c1) && fNumericSet->contains(c2)) { 2504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (7). Upper ATerm x Uppper 2508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fUpperSet->contains(c0) && fATermSet->contains(c1) && fUpperSet->contains(c2)) { 2509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (8) ATerm Close* Sp* x (not (OLettter | Upper | Lower | Sep | STerm | ATerm))* Lower 2513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: STerm | ATerm are added to the negated part of the expression by a 2514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // note to the Unicode 5.0 documents. 2515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int p8 = p1; 2516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (fSpSet->contains(cAt(p8))) { 2517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p8 = moveBack(p8); 2518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (fCloseSet->contains(cAt(p8))) { 2520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p8 = moveBack(p8); 2521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fATermSet->contains(cAt(p8))) { 2523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p8=p2; 2524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 2525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = cAt(p8); 2526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c==-1 || fOLetterSet->contains(c) || fUpperSet->contains(c) || 2527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLowerSet->contains(c) || fSepSet->contains(c) || 2528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fATermSet->contains(c) || fSTermSet->contains(c)) { 2529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p8 = moveForward(p8); 2532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fLowerSet->contains(cAt(p8))) { 2534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule 8a (STerm | ATerm) Close* Sp* x (SContinue | STerm | ATerm); 2539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fSContinueSet->contains(c2) || fSTermSet->contains(c2) || fATermSet->contains(c2)) { 2540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p8 = p1; 2541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (fSpSet->contains(cAt(p8))) { 2542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p8 = moveBack(p8); 2543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (fCloseSet->contains(cAt(p8))) { 2545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p8 = moveBack(p8); 2546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = cAt(p8); 2548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fSTermSet->contains(c) || fATermSet->contains(c)) { 2549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (9) (STerm | ATerm) Close* x (Close | Sp | Sep | CR | LF) 2554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int p9 = p1; 2555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (fCloseSet->contains(cAt(p9))) { 2556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p9 = moveBack(p9); 2557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = cAt(p9); 2559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((fSTermSet->contains(c) || fATermSet->contains(c))) { 2560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fCloseSet->contains(c2) || fSpSet->contains(c2) || fSepSet->contains(c2)) { 2561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (10) (Sterm | ATerm) Close* Sp* x (Sp | Sep | CR | LF) 2566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int p10 = p1; 2567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (fSpSet->contains(cAt(p10))) { 2568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p10 = moveBack(p10); 2569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (fCloseSet->contains(cAt(p10))) { 2571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p10 = moveBack(p10); 2572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fSTermSet->contains(cAt(p10)) || fATermSet->contains(cAt(p10))) { 2574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fSpSet->contains(c2) || fSepSet->contains(c2)) { 2575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Rule (11) (STerm | ATerm) Close* Sp* (Sep | CR | LF)? <break> 2580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int p11 = p1; 2581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fSepSet->contains(cAt(p11))) { 2582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru p11 = moveBack(p11); 2583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (fSpSet->contains(cAt(p11))) { 2585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p11 = moveBack(p11); 2586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (fCloseSet->contains(cAt(p11))) { 2588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p11 = moveBack(p11); 2589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fSTermSet->contains(cAt(p11)) || fATermSet->contains(cAt(p11))) { 2591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule (12) Any x Any 2595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru breakPos = p2; 2598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return breakPos; 2599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBISentMonkey::~RBBISentMonkey() { 2602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSets; 2603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSepSet; 2604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fFormatSet; 2605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSpSet; 2606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fLowerSet; 2607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fUpperSet; 2608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fOLetterSet; 2609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fNumericSet; 2610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fATermSet; 2611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fSContinueSet; 2612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSTermSet; 2613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fCloseSet; 2614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fOtherSet; 2615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fExtendSet; 2616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------- 2621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// RBBILineMonkey 2623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------- 2625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass RBBILineMonkey: public RBBIMonkeyKind { 2627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 2628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBILineMonkey(); 2629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual ~RBBILineMonkey(); 2630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UVector *charClasses(); 2631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual void setText(const UnicodeString &s); 2632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual int32_t next(int32_t i); 2633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual void rule9Adjust(int32_t pos, UChar32 *posChar, int32_t *nextPos, UChar32 *nextChar); 2634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 2635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector *fSets; 2636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fBK; 2638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fCR; 2639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fLF; 2640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fCM; 2641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fNL; 2642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fSG; 2643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fWJ; 2644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fZW; 2645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fGL; 2646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fCB; 2647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fSP; 2648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fB2; 2649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fBA; 2650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fBB; 2651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fHY; 2652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fH2; 2653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fH3; 2654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fCL; 265550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet *fCP; 2656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fEX; 2657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fIN; 2658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fJL; 2659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fJV; 2660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fJT; 2661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fNS; 2662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fOP; 2663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fQU; 2664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fIS; 2665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fNU; 2666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fPO; 2667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fPR; 2668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fSY; 2669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fAI; 2670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fAL; 2671103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UnicodeSet *fCJ; 2672103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UnicodeSet *fHL; 2673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fID; 267454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius UnicodeSet *fRI; 2675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fSA; 2676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fXX; 2677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *fCharBI; 2679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *fText; 2681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *fOrigPositions; 2682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *fNumberMatcher; 2684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *fLB11Matcher; 2685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 2686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBILineMonkey::RBBILineMonkey() 2689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 2691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = new UVector(status); 2693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fBK = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_Break=BK}]"), status); 2695c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fCR = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CR}]"), status); 2696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLF = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=LF}]"), status); 2697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fCM = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CM}]"), status); 2698c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fNL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NL}]"), status); 2699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fWJ = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=WJ}]"), status); 2700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fZW = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ZW}]"), status); 2701c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fGL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=GL}]"), status); 2702c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fCB = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CB}]"), status); 2703c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SP}]"), status); 2704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fB2 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=B2}]"), status); 2705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fBA = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BA}]"), status); 2706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fBB = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=BB}]"), status); 2707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHY = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=HY}]"), status); 2708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fH2 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H2}]"), status); 2709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fH3 = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=H3}]"), status); 2710c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fCL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CL}]"), status); 271150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fCP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CP}]"), status); 2712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fEX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=EX}]"), status); 2713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fIN = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IN}]"), status); 2714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fJL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JL}]"), status); 2715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fJV = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JV}]"), status); 2716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fJT = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=JT}]"), status); 2717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fNS = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NS}]"), status); 2718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fOP = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=OP}]"), status); 2719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fQU = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=QU}]"), status); 2720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fIS = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=IS}]"), status); 2721c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fNU = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=NU}]"), status); 2722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPO = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PO}]"), status); 2723c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPR = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=PR}]"), status); 2724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSY = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SY}]"), status); 2725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAI = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AI}]"), status); 2726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=AL}]"), status); 2727103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fCJ = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=CJ}]"), status); 2728103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fHL = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=HL}]"), status); 2729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fID = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=ID}]"), status); 273054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fRI = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=RI}]"), status); 2731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSA = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=SA}]"), status); 2732c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fSG = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\ud800-\\udfff]"), status); 2733c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fXX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=XX}]"), status); 2734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 2736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru deferredStatus = status; 2737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCharBI = NULL; 2738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fNumberMatcher = NULL; 2739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fAL->addAll(*fXX); // Default behavior for XX is identical to AL 2743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fAL->addAll(*fAI); // Default behavior for AI is identical to AL 2744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fAL->addAll(*fSA); // Default behavior for SA is XX, which defaults to AL 2745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fAL->addAll(*fSG); // Default behavior for SG is identical to AL. 2746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2747103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fNS->addAll(*fCJ); // Default behavior for CJ is identical to NS. 2748103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 2749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fBK, status); 2750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fCR, status); 2751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fLF, status); 2752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fCM, status); 2753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fNL, status); 2754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fWJ, status); 2755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fZW, status); 2756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fGL, status); 2757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fCB, status); 2758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fSP, status); 2759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fB2, status); 2760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fBA, status); 2761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fBB, status); 2762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fHY, status); 2763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fH2, status); 2764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fH3, status); 2765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fCL, status); 276650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fSets->addElement(fCP, status); 2767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fEX, status); 2768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fIN, status); 2769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fJL, status); 2770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fJT, status); 2771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fJV, status); 2772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fNS, status); 2773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fOP, status); 2774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fQU, status); 2775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fIS, status); 2776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fNU, status); 2777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fPO, status); 2778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fPR, status); 2779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fSY, status); 2780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fAI, status); 2781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fAL, status); 2782103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fSets->addElement(fHL, status); 2783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fID, status); 2784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fWJ, status); 278554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fSets->addElement(fRI, status); 2786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fSA, status); 2787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(fSG, status); 2788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const char *rules = 2790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?" 2791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "((\\p{Line_Break=OP}|\\p{Line_Break=HY})\\p{Line_Break=CM}*)?" 2792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\p{Line_Break=NU}\\p{Line_Break=CM}*" 2793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "((\\p{Line_Break=NU}|\\p{Line_Break=IS}|\\p{Line_Break=SY})\\p{Line_Break=CM}*)*" 279450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "((\\p{Line_Break=CL}|\\p{Line_Break=CP})\\p{Line_Break=CM}*)?" 2795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "((\\p{Line_Break=PR}|\\p{Line_Break=PO})\\p{Line_Break=CM}*)?"; 2796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fNumberMatcher = new RegexMatcher( 2798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString(rules, -1, US_INV), 0, status); 2799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCharBI = BreakIterator::createCharacterInstance(Locale::getEnglish(), status); 2801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 2803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru deferredStatus = status; 2804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBILineMonkey::setText(const UnicodeString &s) { 2809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fText = &s; 2810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCharBI->setText(s); 2811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fNumberMatcher->reset(s); 2812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// rule9Adjust 2816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Line Break TR rules 9 and 10 implementation. 2817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This deals with combining marks and other sequences that 2818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// that must be treated as if they were something other than what they actually are. 2819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This is factored out into a separate function because it must be applied twice for 2821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// each potential break, once to the chars before the position being checked, then 2822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// again to the text following the possible break. 2823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBILineMonkey::rule9Adjust(int32_t pos, UChar32 *posChar, int32_t *nextPos, UChar32 *nextChar) { 2825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pos == -1) { 2826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Invalid initial position. Happens during the warmup iteration of the 2827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // main loop in next(). 2828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nPos = *nextPos; 2832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 9 Keep combining sequences together. 2834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // advance over any CM class chars. Note that Line Break CM is different 2835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // from the normal Grapheme Extend property. 2836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!(fSP->contains(*posChar) || fBK->contains(*posChar) || *posChar==0x0d || 2837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *posChar==0x0a ||fNL->contains(*posChar) || fZW->contains(*posChar))) { 2838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 2839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *nextChar = fText->char32At(nPos); 2840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!fCM->contains(*nextChar)) { 2841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nPos = fText->moveIndex32(nPos, 1); 2844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 9 Treat X CM* as if it were x. 2849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No explicit action required. 2850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 10 Treat any remaining combining mark as AL 2852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fCM->contains(*posChar)) { 2853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *posChar = 0x41; // thisChar = 'A'; 2854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Push the updated nextPos and nextChar back to our caller. 2857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This only makes a difference if posChar got bigger by consuming a 2858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // combining sequence. 2859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *nextPos = nPos; 2860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *nextChar = fText->char32At(nPos); 2861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t RBBILineMonkey::next(int32_t startPos) { 2866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 2867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t pos; // Index of the char following a potential break position 2868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 thisChar; // Character at above position "pos" 2869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prevPos; // Index of the char preceding a potential break position 2871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 prevChar; // Character at above position. Note that prevChar 2872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and thisChar may not be adjacent because combining 2873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // characters between them will be ignored. 2874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2875103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t prevPosX2; // Second previous character. Wider context for LB21a. 2876103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 prevCharX2; 2877103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 2878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nextPos; // Index of the next character following pos. 2879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Usually skips over combining marks. 2880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nextCPPos; // Index of the code point following "pos." 2881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // May point to a combining mark. 2882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t tPos; // temp value. 2883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2885c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(deferredStatus)) { 2886c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 2887c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (startPos >= fText->length()) { 2890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Initial values for loop. Loop will run the first time without finding breaks, 2895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // while the invalid values shift out and the "this" and 2896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "prev" positions are filled in with good values. 2897103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius pos = prevPos = prevPosX2 = -1; // Invalid value, serves as flag for initial loop iteration. 2898103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius thisChar = prevChar = prevCharX2 = 0; 2899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextPos = nextCPPos = startPos; 2900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop runs once per position in the test text, until a break position 2903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // is found. 2904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 2905103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius prevPosX2 = prevPos; 2906103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius prevCharX2 = prevChar; 2907103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 2908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevPos = pos; 2909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevChar = thisChar; 2910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = nextPos; 2912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru thisChar = fText->char32At(pos); 2913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextCPPos = fText->moveIndex32(pos, 1); 2915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextPos = nextCPPos; 2916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule LB2 - Break at end of text. 2918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pos >= fText->length()) { 2919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule LB 9 - adjust for combining sequences. 2923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We do this one out-of-order because the adjustment does not change anything 2924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // that would match rules LB 3 - LB 6, but after the adjustment, LB 3-6 do need to 2925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // be applied. 2926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rule9Adjust(prevPos, &prevChar, &pos, &thisChar); 2927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextCPPos = nextPos = fText->moveIndex32(pos, 1); 2928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = fText->char32At(nextPos); 2929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rule9Adjust(pos, &thisChar, &nextPos, &c); 2930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the loop is still warming up - if we haven't shifted the initial 2932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // -1 positions out of prevPos yet - loop back to advance the 2933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // position in the input without any further looking for breaks. 2934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prevPos == -1) { 2935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 4 Always break after hard line breaks, 2939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fBK->contains(prevChar)) { 2940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 5 Break after CR, LF, NL, but not inside CR LF 2944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prevChar == 0x0d && thisChar == 0x0a) { 2945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prevChar == 0x0d || 2948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevChar == 0x0a || 2949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevChar == 0x85) { 2950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 6 Don't break before hard line breaks 2954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (thisChar == 0x0d || thisChar == 0x0a || thisChar == 0x85 || 2955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fBK->contains(thisChar)) { 2956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 7 Don't break before spaces or zero-width space. 2961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fSP->contains(thisChar)) { 2962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fZW->contains(thisChar)) { 2966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 8 Break after zero width space 2970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fZW->contains(prevChar)) { 2971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 9, 10 Already done, at top of loop. 2975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 2976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 11 Do not break before or after WORD JOINER and related characters. 2979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // x WJ 2980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // WJ x 2981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 2982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fWJ->contains(thisChar) || fWJ->contains(prevChar)) { 2983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 12 2987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // GL x 2988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fGL->contains(prevChar)) { 2989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 2990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // LB 12a 2993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // [^SP BA HY] x GL 2994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!(fSP->contains(prevChar) || 2995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fBA->contains(prevChar) || 2996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHY->contains(prevChar) ) && fGL->contains(thisChar)) { 2997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 13 Don't break before closings. 300350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // NU x CL, NU x CP and NU x IS are not matched here so that they will 3004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // fall into LB 17 and the more general number regular expression. 3005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 300627f654740f2a26ad62a5c155af9199af9e69b889claireho if ((!fNU->contains(prevChar) && fCL->contains(thisChar)) || 300727f654740f2a26ad62a5c155af9199af9e69b889claireho (!fNU->contains(prevChar) && fCP->contains(thisChar)) || 300827f654740f2a26ad62a5c155af9199af9e69b889claireho fEX->contains(thisChar) || 300927f654740f2a26ad62a5c155af9199af9e69b889claireho (!fNU->contains(prevChar) && fIS->contains(thisChar)) || 301027f654740f2a26ad62a5c155af9199af9e69b889claireho (!fNU->contains(prevChar) && fSY->contains(thisChar))) { 3011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 14 Don't break after OP SP* 3015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scan backwards, checking for this sequence. 3016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The OP char could include combining marks, so we actually check for 3017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // OP CM* SP* 3018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Another Twist: The Rule 67 fixes may have changed a SP CM 3019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // sequence into a ID char, so before scanning back through spaces, 3020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // verify that prevChar is indeed a space. The prevChar variable 3021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // may differ from fText[prevPos] 3022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tPos = prevPos; 3023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fSP->contains(prevChar)) { 3024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tPos > 0 && fSP->contains(fText->char32At(tPos))) { 3025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tPos=fText->moveIndex32(tPos, -1); 3026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tPos > 0 && fCM->contains(fText->char32At(tPos))) { 3029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tPos=fText->moveIndex32(tPos, -1); 3030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fOP->contains(fText->char32At(tPos))) { 3032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 15 QU SP* x OP 3037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fOP->contains(thisChar)) { 3038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scan backwards from prevChar to see if it is preceded by QU CM* SP* 3039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int tPos = prevPos; 3040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tPos>0 && fSP->contains(fText->char32At(tPos))) { 3041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tPos = fText->moveIndex32(tPos, -1); 3042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tPos>0 && fCM->contains(fText->char32At(tPos))) { 3044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tPos = fText->moveIndex32(tPos, -1); 3045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fQU->contains(fText->char32At(tPos))) { 3047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 305350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // LB 16 (CL | CP) SP* x NS 305450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Scan backwards for SP* CM* (CL | CP) 3055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fNS->contains(thisChar)) { 3056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int tPos = prevPos; 3057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tPos>0 && fSP->contains(fText->char32At(tPos))) { 3058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tPos = fText->moveIndex32(tPos, -1); 3059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tPos>0 && fCM->contains(fText->char32At(tPos))) { 3061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tPos = fText->moveIndex32(tPos, -1); 3062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 306350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fCL->contains(fText->char32At(tPos)) || fCP->contains(fText->char32At(tPos))) { 3064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 17 B2 SP* x B2 3070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fB2->contains(thisChar)) { 3071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scan backwards, checking for the B2 CM* SP* sequence. 3072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tPos = prevPos; 3073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fSP->contains(prevChar)) { 3074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tPos > 0 && fSP->contains(fText->char32At(tPos))) { 3075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tPos=fText->moveIndex32(tPos, -1); 3076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tPos > 0 && fCM->contains(fText->char32At(tPos))) { 3079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tPos=fText->moveIndex32(tPos, -1); 3080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fB2->contains(fText->char32At(tPos))) { 3082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 18 break after space 3088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fSP->contains(prevChar)) { 3089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 19 3093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // x QU 3094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // QU x 3095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fQU->contains(thisChar) || fQU->contains(prevChar)) { 3096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 20 Break around a CB 3100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fCB->contains(thisChar) || fCB->contains(prevChar)) { 3101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 21 3105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fBA->contains(thisChar) || 3106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fHY->contains(thisChar) || 3107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fNS->contains(thisChar) || 3108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fBB->contains(prevChar) ) { 3109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3112103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // LB 21a 3113103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // HL (HY | BA) x 3114103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (fHL->contains(prevCharX2) && 3115103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius (fHY->contains(prevChar) || fBA->contains(prevChar))) { 3116103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius continue; 3117103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3118103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 22 312027f654740f2a26ad62a5c155af9199af9e69b889claireho if ((fAL->contains(prevChar) && fIN->contains(thisChar)) || 3121103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius (fHL->contains(prevChar) && fIN->contains(thisChar)) || 312227f654740f2a26ad62a5c155af9199af9e69b889claireho (fID->contains(prevChar) && fIN->contains(thisChar)) || 312327f654740f2a26ad62a5c155af9199af9e69b889claireho (fIN->contains(prevChar) && fIN->contains(thisChar)) || 312427f654740f2a26ad62a5c155af9199af9e69b889claireho (fNU->contains(prevChar) && fIN->contains(thisChar)) ) { 3125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 23 ID x PO 3130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // AL x NU 3131103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // HL x NU 3132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NU x AL 313327f654740f2a26ad62a5c155af9199af9e69b889claireho if ((fID->contains(prevChar) && fPO->contains(thisChar)) || 313427f654740f2a26ad62a5c155af9199af9e69b889claireho (fAL->contains(prevChar) && fNU->contains(thisChar)) || 3135103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius (fHL->contains(prevChar) && fNU->contains(thisChar)) || 3136103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius (fNU->contains(prevChar) && fAL->contains(thisChar)) || 3137103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius (fNU->contains(prevChar) && fHL->contains(thisChar)) ) { 3138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 24 Do not break between prefix and letters or ideographs. 3142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // PR x ID 3143103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // PR x (AL | HL) 3144103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // PO x (AL | HL) 314527f654740f2a26ad62a5c155af9199af9e69b889claireho if ((fPR->contains(prevChar) && fID->contains(thisChar)) || 3146103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius (fPR->contains(prevChar) && (fAL->contains(thisChar) || fHL->contains(thisChar))) || 3147103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius (fPO->contains(prevChar) && (fAL->contains(thisChar) || fHL->contains(thisChar)))) { 3148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 25 Numbers 3154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fNumberMatcher->lookingAt(prevPos, status)) { 3155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 3156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Matched a number. But could have been just a single digit, which would 3159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // not represent a "no break here" between prevChar and thisChar 3160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numEndIdx = fNumberMatcher->end(status); // idx of first char following num 3161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (numEndIdx > pos) { 3162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Number match includes at least our two chars being checked 3163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (numEndIdx > nextPos) { 3164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Number match includes additional chars. Update pos and nextPos 3165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // so that next loop iteration will continue at the end of the number, 3166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // checking for breaks between last char in number & whatever follows. 3167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = nextPos = numEndIdx; 3168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 3169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = fText->moveIndex32(pos, -1); 3170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru thisChar = fText->char32At(pos); 3171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (fCM->contains(thisChar)); 3172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 26 Do not break a Korean syllable. 3179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fJL->contains(prevChar) && (fJL->contains(thisChar) || 3180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fJV->contains(thisChar) || 3181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fH2->contains(thisChar) || 3182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fH3->contains(thisChar))) { 3183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((fJV->contains(prevChar) || fH2->contains(prevChar)) && 3187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (fJV->contains(thisChar) || fJT->contains(thisChar))) { 3188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((fJT->contains(prevChar) || fH3->contains(prevChar)) && 3192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fJT->contains(thisChar)) { 3193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 27 Treat a Korean Syllable Block the same as ID. 3197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((fJL->contains(prevChar) || fJV->contains(prevChar) || 3198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fJT->contains(prevChar) || fH2->contains(prevChar) || fH3->contains(prevChar)) && 3199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fIN->contains(thisChar)) { 3200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((fJL->contains(prevChar) || fJV->contains(prevChar) || 3203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fJT->contains(prevChar) || fH2->contains(prevChar) || fH3->contains(prevChar)) && 3204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPO->contains(thisChar)) { 3205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fPR->contains(prevChar) && (fJL->contains(thisChar) || fJV->contains(thisChar) || 3208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fJT->contains(thisChar) || fH2->contains(thisChar) || fH3->contains(thisChar))) { 3209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // LB 28 Do not break between alphabetics ("at"). 3215103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if ((fAL->contains(prevChar) || fHL->contains(prevChar)) && (fAL->contains(thisChar) || fHL->contains(thisChar))) { 3216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 29 Do not break between numeric punctuation and alphabetics ("e.g."). 3220103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (fIS->contains(prevChar) && (fAL->contains(thisChar) || fHL->contains(thisChar))) { 3221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 322450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // LB 30 Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation. 322550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (AL | NU) x OP 322650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // CP x (AL | NU) 3227103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if ((fAL->contains(prevChar) || fHL->contains(prevChar) || fNU->contains(prevChar)) && fOP->contains(thisChar)) { 322850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 322950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3230103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (fCP->contains(prevChar) && (fAL->contains(thisChar) || fHL->contains(thisChar) || fNU->contains(thisChar))) { 323150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 323250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 323350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 323454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // LB30a Do not break between regional indicators. 323554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // RI x RI 323654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (fRI->contains(prevChar) && fRI->contains(thisChar)) { 323754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius continue; 323854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } 323954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 3240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LB 31 Break everywhere else 3241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return pos; 3246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUVector *RBBILineMonkey::charClasses() { 3250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fSets; 3251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRBBILineMonkey::~RBBILineMonkey() { 3255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSets; 3256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fBK; 3258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fCR; 3259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fLF; 3260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fCM; 3261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fNL; 3262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fWJ; 3263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fZW; 3264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fGL; 3265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fCB; 3266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSP; 3267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fB2; 3268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fBA; 3269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fBB; 3270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fHY; 3271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fH2; 3272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fH3; 3273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fCL; 327450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fCP; 3275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fEX; 3276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fIN; 3277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fJL; 3278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fJV; 3279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fJT; 3280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fNS; 3281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fOP; 3282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fQU; 3283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fIS; 3284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fNU; 3285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fPO; 3286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fPR; 3287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSY; 3288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fAI; 3289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fAL; 3290103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius delete fCJ; 3291103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius delete fHL; 3292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fID; 329354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius delete fRI; 3294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSA; 3295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSG; 3296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fXX; 3297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fCharBI; 3299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fNumberMatcher; 3300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------- 3304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 3305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// TestMonkey 3306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 3307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// params 3308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// seed=nnnnn Random number starting seed. 3309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Setting the seed allows errors to be reproduced. 3310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// loop=nnn Looping count. Controls running time. 3311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// -1: run forever. 3312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 0 or greater: run length. 3313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 3314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// type = char | word | line | sent | title 3315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 3316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------------------- 3317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t getIntParam(UnicodeString name, UnicodeString ¶ms, int32_t defaultVal) { 3319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t val = defaultVal; 3320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name.append(" *= *(-?\\d+)"); 3321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 3322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m(name, params, 0, status); 3323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (m.find()) { 3324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The param exists. Convert the string to an int. 3325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char valString[100]; 3326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t paramLength = m.end(1, status) - m.start(1, status); 3327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (paramLength >= (int32_t)(sizeof(valString)-1)) { 3328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru paramLength = (int32_t)(sizeof(valString)-2); 3329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru params.extract(m.start(1, status), paramLength, valString, sizeof(valString)); 3331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru val = strtol(valString, NULL, 10); 3332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delete this parameter from the params string. 3334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(); 3335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru params = m.replaceFirst("", status); 3336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(U_SUCCESS(status)); 3338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return val; 3339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 3341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 334254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#if !UCONFIG_NO_REGULAR_EXPRESSIONS 3343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void testBreakBoundPreceding(RBBITest *test, UnicodeString ustr, 3344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *bi, 3345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expected[], 3346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expectedcount) 3347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 3348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int count = 0; 3349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i = 0; 3350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int forward[50]; 3351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi->setText(ustr); 3352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) { 3353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru forward[count] = i; 3354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (count < expectedcount && expected[count] != i) { 3355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test->errln("break forward test failed: expected %d but got %d", 3356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected[count], i); 3357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count ++; 3360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (count != expectedcount) { 3362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printStringBreaks(ustr, expected, expectedcount); 3363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test->errln("break forward test failed: missed %d match", 3364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedcount - count); 3365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // testing boundaries 3368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 1; i < expectedcount; i ++) { 3369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int j = expected[i - 1]; 3370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!bi->isBoundary(j)) { 3371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printStringBreaks(ustr, expected, expectedcount); 3372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test->errln("isBoundary() failed. Expected boundary at position %d", j); 3373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (j = expected[i - 1] + 1; j < expected[i]; j ++) { 3376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bi->isBoundary(j)) { 3377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printStringBreaks(ustr, expected, expectedcount); 3378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test->errln("isBoundary() failed. Not expecting boundary at position %d", j); 3379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) { 3385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count --; 3386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (forward[count] != i) { 338754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius printStringBreaks(ustr, expected, expectedcount); 3388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test->errln("happy break test previous() failed: expected %d but got %d", 3389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru forward[count], i); 3390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (count != 0) { 3394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printStringBreaks(ustr, expected, expectedcount); 3395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test->errln("break test previous() failed: missed a match"); 3396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // testing preceding 3400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < expectedcount - 1; i ++) { 3401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // int j = expected[i] + 1; 3402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int j = ustr.moveIndex32(expected[i], 1); 3403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (; j <= expected[i + 1]; j ++) { 3404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bi->preceding(j) != expected[i]) { 3405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printStringBreaks(ustr, expected, expectedcount); 3406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test->errln("preceding(): Not expecting boundary at position %d", j); 3407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 341254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#endif 3413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestWordBreaks(void) 3415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 3416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 3417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Locale locale("en"); 3419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 3420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status); 3421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *bi = BreakIterator::createWordInstance(locale, status); 342254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // Replaced any C+J characters in a row with a random sequence of characters 342354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius // of the same length to make our C+J segmentation not get in the way. 3424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *strlist[] = 3425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 3426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U000e0032\\u0097\\u0f94\\uc2d8\\u05f4\\U000e0031\\u060d", 342754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "\\U000e0037\\u2666\\u1202\\u003a\\U000e0031\\u064d\\u0bea\\u091c\\U000e0040\\u003b", 3428c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\u0589\\u3e99\\U0001d7f3\\U000e0074\\u1810\\u200e\\U000e004b\\u0027\\U000e0061\\u003a", 3429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u398c\\U000104a5\\U0001d173\\u102d\\u002e\\uca3b\\u002e\\u002c\\u5622", 343054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "\\uac00\\u3588\\u009c\\u0953\\u194b", 3431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e", 3432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0602\\u2019\\ua191\\U000e0063\\u0a4c\\u003a\\ub4b5\\u003a\\u827f\\u002e", 343354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "\\u2f1f\\u1634\\u05f8\\u0944\\u04f2\\u0cdf\\u1f9c\\u05f4\\u002e", 3434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044", 3435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u003b\\u024a\\u102e\\U000e0071\\u0600", 3436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2027\\U000e0067\\u0a47\\u00b7", 3437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0", 3438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027", 3439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0589\\U000e006e\\u0a42\\U000104a5", 344054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "\\u0f66\\u2523\\u003a\\u0cae\\U000e0047\\u003a", 3441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7", 3442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0027\\u11af\\U000e0057\\u0602", 3443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d7f2\\U000e007\\u0004\\u0589", 3444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b", 3445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d7f2\\U000e007d\\u0004\\u0589", 3446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d", 3447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959", 3448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U000e0065\\u302c\\uc986\\u09ee\\U000e0068", 3449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7", 3450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0233\\U000e0020\\u0a69\\u0d6a", 3451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019", 345254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "\\u18f4\\U000e0049\\u20e7\\u2027", 3453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ub315\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe", 3454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ua183\\u102d\\u0bec\\u003a", 3455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u17e8\\u06e7\\u002e\\u096d\\u003b", 3456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u003a\\u0e57\\u0fad\\u002e", 3457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de", 3458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a", 3459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U000e005d\\u2044\\u0731\\u0650\\u0061", 3460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u003a\\u0664\\u00b7\\u1fba", 3461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u003b\\u0027\\u00b7\\u47a3", 346254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "\\u2027\\U000e0067\\u0a42\\u00b7\\u4edf\\uc26c\\u003a\\u4186\\u041b", 3463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0027\\u003a\\U0001d70f\\U0001d7df\\ubf4a\\U0001d7f5\\U0001d177\\u003a\\u0e51\\u1058\\U000e0058\\u00b7\\u0673", 3464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uc30d\\u002e\\U000e002c\\u0c48\\u003a\\ub5a1\\u0661\\u002c", 3465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 3466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int loop; 3467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 34686d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 3469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) { 3472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // printf("looping %d\n", loop); 3473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString ustr = CharsToUnicodeString(strlist[loop]); 3474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RBBICharMonkey monkey; 3475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBIWordMonkey monkey; 3476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expected[50]; 3478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expectedcount = 0; 3479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru monkey.setText(ustr); 3481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 3482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) { 3483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected[expectedcount ++] = i; 3484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testBreakBoundPreceding(this, ustr, bi, expected, expectedcount); 3487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 3489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 3490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestWordBoundary(void) 3493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 3494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // <data><>\u1d4a\u206e<?>\u0603\U0001d7ff<>\u2019<></data> 3495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Locale locale("en"); 3496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 3497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status); 3498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *bi = BreakIterator::createWordInstance(locale, status); 3499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar str[50]; 3500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *strlist[] = 3501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 3502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e", 3503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044", 3504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u003b\\u024a\\u102e\\U000e0071\\u0600", 3505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2027\\U000e0067\\u0a47\\u00b7", 3506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1fcd\\u002c\\u07aa\\u0027\\u11b0", 3507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u002c\\U000e003c\\U0001d7f4\\u003a\\u0c6f\\u0027", 3508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0589\\U000e006e\\u0a42\\U000104a5", 3509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u4f66\\ub523\\u003a\\uacae\\U000e0047\\u003a", 3510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u003a\\u0f21\\u0668\\u0dab\\u003a\\u0655\\u00b7", 3511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0027\\u11af\\U000e0057\\u0602", 3512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d7f2\\U000e007\\u0004\\u0589", 3513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U000e0022\\u003a\\u10b3\\u003a\\ua21b\\u002e\\U000e0058\\u1732\\U000e002b", 3514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d7f2\\U000e007d\\u0004\\u0589", 3515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u82ab\\u17e8\\u0736\\u2019\\U0001d64d", 3516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0e01\\ub55c\\u0a68\\U000e0037\\u0cd6\\u002c\\ub959", 351754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "\\U000e0065\\u302c\\u09ee\\U000e0068", 3518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0be8\\u002e\\u0c68\\u066e\\u136d\\ufc99\\u59e7", 3519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0233\\U000e0020\\u0a69\\u0d6a", 3520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u206f\\u0741\\ub3ab\\u2019\\ubcac\\u2019", 3521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u58f4\\U000e0049\\u20e7\\u2027", 352254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "\\U0001d7e5\\U000e0073\\u0c47\\u06f2\\u0c6a\\u0037\\u10fe", 3523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ua183\\u102d\\u0bec\\u003a", 3524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u17e8\\u06e7\\u002e\\u096d\\u003b", 3525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u003a\\u0e57\\u0fad\\u002e", 3526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u002e\\U000e004c\\U0001d7ea\\u05bb\\ud0fd\\u02de", 3527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u32e6\\U0001d7f6\\u0fa1\\u206a\\U000e003c\\u0cec\\u003a", 3528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ua2a5\\u0038\\u2044\\u002e\\u0c67\\U000e003c\\u05f4\\u2027\\u05f4\\u2019", 3529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u003a\\u0664\\u00b7\\u1fba", 3530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u003b\\u0027\\u00b7\\u47a3", 3531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 3532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int loop; 3533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 35346d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 3535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) { 3538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // printf("looping %d\n", loop); 3539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_unescape(strlist[loop], str, 20); 3540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString ustr(str); 3541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int forward[50]; 3542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int count = 0; 3543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi->setText(ustr); 3545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int prev = 0; 3546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 3547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) { 3548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru forward[count ++] = i; 3549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i > prev) { 3550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int j; 3551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (j = prev + 1; j < i; j ++) { 3552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bi->isBoundary(j)) { 3553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printStringBreaks(ustr, forward, count); 3554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("happy boundary test failed: expected %d not a boundary", 3555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru j); 3556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!bi->isBoundary(i)) { 3561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printStringBreaks(ustr, forward, count); 3562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("happy boundary test failed: expected %d a boundary", 3563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i); 3564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev = i; 3567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 3570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestLineBreaks(void) 3573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 3574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 3575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Locale locale("en"); 3576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 3577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *bi = BreakIterator::createLineInstance(locale, status); 3578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const int32_t STRSIZE = 50; 3579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar str[STRSIZE]; 3580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *strlist[] = 3581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 3582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u300f\\ufdfc\\ub798\\u2011\\u2011\\u0020\\u0b43\\u002d\\ubeec\\ufffc", 3583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u24ba\\u2060\\u3405\\ub290\\u000d\\U000e0032\\ufe35\\u00a0\\u0361\\" 3584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "U000112ed\\u0f0c\\u000a\\u308e\\ua875\\u0085\\u114d", 3585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ufffc\\u3063\\u2e08\\u30e3\\u000d\\u002d\\u0ed8\\u002f\\U00011a57\\" 3586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "u2014\\U000e0105\\u118c\\u000a\\u07f8", 3587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0668\\u192b\\u002f\\u2034\\ufe39\\u00b4\\u0cc8\\u2571\\u200b\\u003f", 3588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ufeff\\ufffc\\u3289\\u0085\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123", 3589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ufe3c\\u201c\\u000d\\u2025\\u2007\\u201c\\u002d\\u20a0\\u002d\\u30a7\\u17a4", 3590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2772\\u0020\\U000e010a\\u0020\\u2025\\u000a\\U000e0123", 3591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u002d\\uff1b\\u02c8\\u2029\\ufeff\\u0f22\\u2044\\ufe09\\u003a\\u096d\\u2009\\u000a\\u06f7\\u02cc\\u1019\\u2060", 3592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1781\\u0b68\\u0f0c\\u3010\\u0085\\U00011f7a\\u0020\\u0dd6\\u200b\\U000e007a\\u000a\\u2060\\u2026\\u002f\\u2026\\u24dc\\u101e\\u2014\\u2007\\u30a5", 3593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2770\\u0020\\U000e010f\\u0020\\u2060\\u000a\\u02cc\\u0bcc\\u060d\\u30e7\\u0f3b\\u002f", 3594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ufeff\\u0028\\u003b\\U00012fec\\u2010\\u0020\\u0004\\u200b\\u0020\\u275c\\u002f\\u17b1", 3595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u20a9\\u2014\\u00a2\\u31f1\\u002f\\u0020\\u05b8\\u200b\\u0cc2\\u003b\\u060d\\u02c8\\ua4e8\\u002f\\u17d5", 3596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u002d\\u136f\\uff63\\u0084\\ua933\\u2028\\u002d\\u431b\\u200b\\u20b0", 3597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uade3\\u11d6\\u000a\\U0001107d\\u203a\\u201d\\ub070\\u000d\\u2024\\ufffc", 3598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uff5b\\u101c\\u1806\\u002f\\u2213\\uff5f", 3599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2014\\u0a83\\ufdfc\\u003f\\u00a0\\u0020\\u000a\\u2991\\U0001d179\\u0020\\u201d\\U000125f6\\u0a67\\u20a7\\ufeff\\u043f", 3600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u169b\\U000e0130\\u002d\\u1041\\u0f3d\\u0abf\\u00b0\\u31fb\\u00a0\\u002d\\u02c8\\u003b", 3601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2762\\u1680\\u002d\\u2028\\u0027\\u01dc\\ufe56\\u003a\\u000a\\uffe6\\u29fd\\u0020\\u30ee\\u007c\\U0001d178\\u0af1\\u0085", 3602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u3010\\u200b\\u2029\\ufeff\\ufe6a\\u275b\\U000e013b\\ufe37\\u24d4\\u002d\\u1806\\u256a\\u1806\\u247c\\u0085\\u17ac", 3603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u99ab\\u0027\\u003b\\u2026\\ueaf0\\u0020\\u0020\\u0313\\u0020\\u3099\\uff09\\u208e\\u2011\\u2007\\u2060\\u000a\\u0020\\u0020\\u300b\\u0bf9", 3604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1806\\u060d\\u30f5\\u00b4\\u17e9\\u2544\\u2028\\u2024\\u2011\\u20a3\\u002d\\u09cc\\u1782\\u000d\\uff6f\\u0025", 3605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u002f\\uf22e\\u1944\\ufe3d\\u0020\\u206f\\u31b3\\u2014\\u002d\\u2025\\u0f0c\\u0085\\u2763", 3606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u002f\\u2563\\u202f\\u0085\\u17d5\\u200b\\u0020\\U000e0043\\u2014\\u058a\\u3d0a\\ufe57\\u2035\\u2028\\u2029", 3607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u20ae\\U0001d169\\u9293\\uff1f\\uff1f\\u0021\\u2012\\u2039\\u0085\\u02cc\\u00a2\\u0020\\U000e01ab\\u3085\\u0f3a\\u1806\\u0f0c\\u1945\\u000a\\U0001d7e7", 3608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uffe6\\u00a0\\u200b\\u0085\\u2116\\u255b\\U0001d7f7\\u178c\\ufffc", 3609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u02cc\\ufe6a\\u00a0\\u0021\\u002d\\u7490\\uec2e\\u200b\\u000a", 3610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uec2e\\u200b\\u000a\\u0020\\u2028\\u2014\\u8945", 3611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u7490\\uec2e\\u200b\\u000a\\u0020\\u2028\\u2014", 3612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0020\\u2028\\u2014\\u8945\\u002c\\u005b", 3613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u000a\\ufe3c\\u201c\\u000d\\u2025\\u2007\\u201c\\u002d\\u20a0", 3614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2473\\u0e9d\\u0020\\u0085\\u000a\\ufe3c\\u201c\\u000d\\u2025", 3615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d16e\\ufffc\\u2025\\u0021\\u002d", 3616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ufffc\\u301b\\u0fa5\\U000e0103\\u2060\\u208e\\u17d5\\u034f\\u1009\\u003a\\u180e\\u2009\\u3111", 3617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2014\\u0020\\u000a\\u17c5\\u24fc", 3618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ufffc\\u0020\\u2116\\uff6c\\u200b\\u0ac3\\U0001028f", 3619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uaeb0\\u0344\\u0085\\ufffc\\u073b\\u2010", 3620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ufeff\\u0589\\u0085\\u0eb8\\u30fd\\u002f\\u003a\\u2014\\ufe43", 3621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u09cc\\u256a\\u276d\\u002d\\u3085\\u000d\\u0e05\\u2028\\u0fbb", 3622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2034\\u00bb\\u0ae6\\u300c\\u0020\\u31f8\\ufffc", 3623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2116\\u0ed2\\uff64\\u02cd\\u2001\\u2060", 3624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u809d\\u2e02\\u0f0a\\uc48f\\u2540\\u000d\\u0cef\\u003a\\u0e4d" 3625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U000e0172\\U000e005c\\u17cf\\U00010ca6\\ufeff\\uf621\\u06f3\\uffe5" 3626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0ea2\\ufeff\\udcea\\u3085\\ua874\\u000a\\u0020\\u000b\\u200b", 3627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ufe10\\u2060\\u1a5a\\u2060\\u17e4\\ufffc\\ubbe1\\ufe15\\u0020\\u00a0", 3628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2060\\u2213\\u200b\\u2019\\uc2dc\\uff6a\\u1736\\u0085\\udb07", 3629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 3630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int loop; 3631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 3632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 3633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) { 3636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // printf("looping %d\n", loop); 3637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t t = u_unescape(strlist[loop], str, STRSIZE); 3638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t >= STRSIZE) { 3639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(FALSE); 3640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString ustr(str); 3645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBILineMonkey monkey; 3646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(monkey.deferredStatus)) { 3647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const int EXPECTEDSIZE = 50; 3651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expected[EXPECTEDSIZE]; 3652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expectedcount = 0; 3653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru monkey.setText(ustr); 3655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 3656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) { 3657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedcount >= EXPECTEDSIZE) { 3658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(expectedcount < EXPECTEDSIZE); 3659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected[expectedcount ++] = i; 3662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testBreakBoundPreceding(this, ustr, bi, expected, expectedcount); 3665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 3667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 3668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestSentBreaks(void) 3671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 3672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 3673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Locale locale("en"); 3674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 3675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *bi = BreakIterator::createSentenceInstance(locale, status); 3676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar str[200]; 3677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *strlist[] = 3678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 3679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Now\ris\nthe\r\ntime\n\rfor\r\r", 3680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "This\n", 3681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000.", 3682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\"Sentence ending with a quote.\" Bye.", 3683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " (This is it). Testing the sentence iterator. \"This isn't it.\"", 3684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Hi! This is a simple sample sentence. (This is it.) This is a simple sample sentence. \"This isn't it.\"", 3685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Hi! This is a simple sample sentence. It does not have to make any sense as you can see. ", 3686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ", 3687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Che la dritta via aveo smarrita. He said, that I said, that you said!! ", 3688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Don't rock the boat.\\u2029Because I am the daddy, that is why. Not on my time (el timo.)!", 3689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001040a\\u203a\\u1217\\u2b23\\u000d\\uff3b\\u03dd\\uff57\\u0a69\\u104a\\ufe56\\ufe52" 3690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u3016\\U000e002f\\U000e0077\\u0662\\u1680\\u2984\\U000e006a\\u002e\\ua6ab\\u104a" 3691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u002e\\u019b\\u2005\\u002e\\u0477\\u0438\\u0085\\u0441\\u002e\\u5f61\\u202f" 3692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001019f\\uff08\\u27e8\\u055c\\u0352", 3693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1f3e\\u004d\\u000a\\ua3e4\\U000e0023\\uff63\\u0c52\\u276d\\U0001d5de\\U0001d171" 3694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0e38\\u17e5\\U00012fe6\\u0fa9\\u267f\\u1da3\\u0046\\u03ed\\udc72\\u0030" 3695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d688\\u0b6d\\u0085\\u0c67\\u1f94\\u0c6c\\u9cb2\\u202a\\u180e\\u000b" 3696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u002e\\U000e005e\\u035b\\u061f\\u02c1\\U000e0025\\u0357\\u0969\\u202b" 3697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U000130c5\\u0486\\U000e0123\\u2019\\u01bc\\u2006\\u11ad\\u180e\\u2e05" 3698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u10b7\\u013e\\u000a\\u002e\\U00013ea4" 3699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 3700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int loop; 3701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 37026d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 3703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (loop = 0; loop < (int)(sizeof(strlist) / sizeof(char *)); loop ++) { 3706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_unescape(strlist[loop], str, (int32_t)(sizeof(str) / sizeof(str[0]))); 3707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString ustr(str); 3708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBISentMonkey monkey; 3710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(monkey.deferredStatus)) { 3711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const int EXPECTEDSIZE = 50; 3715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expected[EXPECTEDSIZE]; 3716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expectedcount = 0; 3717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru monkey.setText(ustr); 3719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 3720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i != BreakIterator::DONE; i = monkey.next(i)) { 3721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedcount >= EXPECTEDSIZE) { 3722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(expectedcount < EXPECTEDSIZE); 3723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected[expectedcount ++] = i; 3726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testBreakBoundPreceding(this, ustr, bi, expected, expectedcount); 3729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 3731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 3732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestMonkey(char *params) { 3735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 3736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 3738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t loopCount = 500; 3739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t seed = 1; 3740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString breakType = "all"; 3741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Locale locale("en"); 3742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool useUText = FALSE; 3743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (quick == FALSE) { 3745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loopCount = 10000; 3746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (params) { 3749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString p(params); 3750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loopCount = getIntParam("loop", p, loopCount); 3751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru seed = getIntParam("seed", p, seed); 3752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m(" *type *= *(char|word|line|sent|title) *", p, 0, status); 3754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (m.find()) { 3755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru breakType = m.group(1, status); 3756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m.reset(); 3757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p = m.replaceFirst("", status); 3758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher u(" *utext", p, 0, status); 3761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u.find()) { 3762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru useUText = TRUE; 3763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u.reset(); 3764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p = u.replaceFirst("", status); 3765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // m.reset(p); 3769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (RegexMatcher(UNICODE_STRING_SIMPLE("\\S"), p, 0, status).find()) { 3770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Each option is stripped out of the option string as it is processed. 3771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // All options have been checked. The option string should have been completely emptied.. 3772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buf[100]; 3773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p.extract(buf, sizeof(buf), NULL, status); 3774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf[sizeof(buf)-1] = 0; 3775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Unrecognized or extra parameter: %s\n", buf); 3776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (breakType == "char" || breakType == "all") { 3782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBICharMonkey m; 3783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status); 3784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 3785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RunMonkey(bi, m, "char", seed, loopCount, useUText); 3786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (breakType == "all" && useUText==FALSE) { 3787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Also run a quick test with UText when "all" is specified 3788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RunMonkey(bi, m, "char", seed, loopCount, TRUE); 3789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 37926d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Creation of character break iterator failed %s", u_errorName(status)); 3793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 3795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (breakType == "word" || breakType == "all") { 3798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Word Break Monkey Test"); 3799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBIWordMonkey m; 3800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *bi = BreakIterator::createWordInstance(locale, status); 3801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 3802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RunMonkey(bi, m, "word", seed, loopCount, useUText); 3803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 38056d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Creation of word break iterator failed %s", u_errorName(status)); 3806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 3808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (breakType == "line" || breakType == "all") { 3811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Line Break Monkey Test"); 3812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBILineMonkey m; 3813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *bi = BreakIterator::createLineInstance(locale, status); 3814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (loopCount >= 10) { 3815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loopCount = loopCount / 5; // Line break runs slower than the others. 3816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 3818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RunMonkey(bi, m, "line", seed, loopCount, useUText); 3819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 38216d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status)); 3822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 3824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (breakType == "sent" || breakType == "all" ) { 3827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Sentence Break Monkey Test"); 3828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RBBISentMonkey m; 3829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru BreakIterator *bi = BreakIterator::createSentenceInstance(locale, status); 3830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (loopCount >= 10) { 3831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loopCount = loopCount / 10; // Sentence runs slower than the other break types 3832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 3834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RunMonkey(bi, m, "sentence", seed, loopCount, useUText); 3835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 38376d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status)); 3838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bi; 3840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 3843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 3846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Run a RBBI monkey test. Common routine, for all break iterator types. 3847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Parameters: 3848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// bi - the break iterator to use 3849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// mk - MonkeyKind, abstraction for obtaining expected results 3850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// name - Name of test (char, word, etc.) for use in error messages 3851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// seed - Seed for starting random number generator (parameter from user) 3852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// numIterations 3853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 3854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, uint32_t seed, 3855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numIterations, UBool useUText) { 3856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 3858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const int32_t TESTSTRINGLEN = 500; 3860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString testText; 3861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numCharClasses; 3862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector *chClasses; 3863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expected[TESTSTRINGLEN*2 + 1]; 3864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int expectedCount = 0; 3865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char expectedBreaks[TESTSTRINGLEN*2 + 1]; 3866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char forwardBreaks[TESTSTRINGLEN*2 + 1]; 3867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char reverseBreaks[TESTSTRINGLEN*2+1]; 3868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char isBoundaryBreaks[TESTSTRINGLEN*2+1]; 3869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char followingBreaks[TESTSTRINGLEN*2+1]; 3870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char precedingBreaks[TESTSTRINGLEN*2+1]; 3871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 3872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int loopCount = 0; 3873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m_seed = seed; 3875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru numCharClasses = mk.charClasses()->size(); 3877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chClasses = mk.charClasses(); 3878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check for errors that occured during the construction of the MonkeyKind object. 3880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Can't report them where they occured because errln() is a method coming from intlTest, 3881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and is not visible outside of RBBITest :-( 3882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(mk.deferredStatus)) { 3883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("status of \"%s\" in creation of RBBIMonkeyKind.", u_errorName(mk.deferredStatus)); 3884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Verify that the character classes all have at least one member. 3888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<numCharClasses; i++) { 3889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *s = (UnicodeSet *)chClasses->elementAt(i); 3890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s == NULL || s->size() == 0) { 3891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Character Class #%d is null or of zero size.", i); 3892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (loopCount < numIterations || numIterations == -1) { 3897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (numIterations == -1 && loopCount % 10 == 0) { 3898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If test is running in an infinite loop, display a periodic tic so 3899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we can tell that it is making progress. 3900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "."); 3901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save current random number seed, so that we can recreate the random numbers 3903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for this loop iteration in event of an error. 3904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru seed = m_seed; 3905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Populate a test string with data. 3907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testText.truncate(0); 3908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<TESTSTRINGLEN; i++) { 3909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t aClassNum = m_rand() % numCharClasses; 3910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *classSet = (UnicodeSet *)chClasses->elementAt(aClassNum); 3911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t charIdx = m_rand() % classSet->size(); 3912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = classSet->charAt(charIdx); 3913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < 0) { // TODO: deal with sets containing strings. 3914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("c < 0"); 3915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testText.append(c); 3918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Calculate the expected results for this test string. 3921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mk.setText(testText); 3922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memset(expectedBreaks, 0, sizeof(expectedBreaks)); 3923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedBreaks[0] = 1; 3924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t breakPos = 0; 3925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedCount = 0; 3926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 3927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru breakPos = mk.next(breakPos); 3928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (breakPos == -1) { 3929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (breakPos > testText.length()) { 3932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("breakPos > testText.length()"); 3933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectedBreaks[breakPos] = 1; 3935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(expectedCount<testText.length()); 3936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected[expectedCount ++] = breakPos; 3937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Find the break positions using forward iteration 3940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memset(forwardBreaks, 0, sizeof(forwardBreaks)); 3941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (useUText) { 3942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 3943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UText *testUText = utext_openReplaceable(NULL, &testText, &status); 3944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // testUText = utext_openUnicodeString(testUText, &testText, &status); 3945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi->setText(testUText, status); 3946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 3947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utext_close(testUText); // The break iterator does a shallow clone of the UText 3948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This UText can be closed immediately, so long as the 3949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // testText string continues to exist. 3950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi->setText(testText); 3952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=bi->first(); i != BreakIterator::DONE; i=bi->next()) { 3955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i < 0 || i > testText.length()) { 3956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("%s break monkey test: Out of range value returned by breakIterator::next()", name); 3957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru forwardBreaks[i] = 1; 3960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Find the break positions using reverse iteration 3963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memset(reverseBreaks, 0, sizeof(reverseBreaks)); 3964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=bi->last(); i != BreakIterator::DONE; i=bi->previous()) { 3965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i < 0 || i > testText.length()) { 3966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("%s break monkey test: Out of range value returned by breakIterator::next()", name); 3967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reverseBreaks[i] = 1; 3970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Find the break positions using isBoundary() tests. 3973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memset(isBoundaryBreaks, 0, sizeof(isBoundaryBreaks)); 3974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT((int32_t)sizeof(isBoundaryBreaks) > testText.length()); 3975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<=testText.length(); i++) { 3976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isBoundaryBreaks[i] = bi->isBoundary(i); 3977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Find the break positions using the following() function. 3981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // printf("."); 3982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memset(followingBreaks, 0, sizeof(followingBreaks)); 3983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lastBreakPos = 0; 3984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru followingBreaks[0] = 1; 3985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<testText.length(); i++) { 3986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru breakPos = bi->following(i); 3987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (breakPos <= i || 3988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru breakPos < lastBreakPos || 3989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru breakPos > testText.length() || 399027f654740f2a26ad62a5c155af9199af9e69b889claireho (breakPos > lastBreakPos && lastBreakPos > i)) { 3991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("%s break monkey test: " 3992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Out of range value returned by BreakIterator::following().\n" 3993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Random seed=%d index=%d; following returned %d; lastbreak=%d", 3994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name, seed, i, breakPos, lastBreakPos); 3995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru followingBreaks[breakPos] = 1; 3998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lastBreakPos = breakPos; 3999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Find the break positions using the preceding() function. 4002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru memset(precedingBreaks, 0, sizeof(precedingBreaks)); 4003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lastBreakPos = testText.length(); 4004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru precedingBreaks[testText.length()] = 1; 4005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=testText.length(); i>0; i--) { 4006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru breakPos = bi->preceding(i); 4007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (breakPos >= i || 4008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru breakPos > lastBreakPos || 400927f654740f2a26ad62a5c155af9199af9e69b889claireho (breakPos < 0 && testText.getChar32Start(i)>0) || 401027f654740f2a26ad62a5c155af9199af9e69b889claireho (breakPos < lastBreakPos && lastBreakPos < testText.getChar32Start(i)) ) { 4011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("%s break monkey test: " 4012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Out of range value returned by BreakIterator::preceding().\n" 4013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "index=%d; prev returned %d; lastBreak=%d" , 4014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name, i, breakPos, lastBreakPos); 4015c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (breakPos >= 0 && breakPos < (int32_t)sizeof(precedingBreaks)) { 4016c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru precedingBreaks[i] = 2; // Forces an error. 4017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (breakPos >= 0) { 4020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru precedingBreaks[breakPos] = 1; 4021c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lastBreakPos = breakPos; 4023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compare the expected and actual results. 4027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<=testText.length(); i++) { 4028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *errorType = NULL; 4029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (forwardBreaks[i] != expectedBreaks[i]) { 4030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorType = "next()"; 4031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (reverseBreaks[i] != forwardBreaks[i]) { 4032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorType = "previous()"; 4033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (isBoundaryBreaks[i] != expectedBreaks[i]) { 4034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorType = "isBoundary()"; 4035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (followingBreaks[i] != expectedBreaks[i]) { 4036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorType = "following()"; 4037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (precedingBreaks[i] != expectedBreaks[i]) { 4038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorType = "preceding()"; 4039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (errorType != NULL) { 4043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Format a range of the test text that includes the failure as 4044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a data item that can be included in the rbbi test data file. 4045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Start of the range is the last point where expected and actual results 4047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // both agreed that there was a break position. 4048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int startContext = i; 4049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count = 0; 4050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 4051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (startContext==0) { break; } 4052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru startContext --; 4053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedBreaks[startContext] != 0) { 4054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (count == 2) break; 4055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count ++; 4056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // End of range is two expected breaks past the start position. 4060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int endContext = i + 1; 4061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int ci; 4062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (ci=0; ci<2; ci++) { // Number of items to include in error text. 4063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 4064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (endContext >= testText.length()) {break;} 4065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expectedBreaks[endContext-1] != 0) { 4066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (count == 0) break; 4067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count --; 4068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru endContext ++; 4070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Format looks like "<data>\\\uabcd\uabcd\\\U0001abcd...</data>" 4074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString errorText = "<data>"; 4075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /***if (strcmp(errorType, "next()") == 0) { 4076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru startContext = 0; 4077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru endContext = testText.length(); 4078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printStringBreaks(testText, expected, expectedCount); 4080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }***/ 4081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (ci=startContext; ci<endContext;) { 4083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString hexChars("0123456789abcdef"); 4084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 4085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int bn; 4086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = testText.char32At(ci); 4087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ci == i) { 4088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is the location of the error. 4089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorText.append("<?>"); 4090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (expectedBreaks[ci] != 0) { 4091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This a non-error expected break position. 4092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorText.append("\\"); 4093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < 0x10000) { 4095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorText.append("\\u"); 4096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (bn=12; bn>=0; bn-=4) { 4097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorText.append(hexChars.charAt((c>>bn)&0xf)); 4098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorText.append("\\U"); 4101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (bn=28; bn>=0; bn-=4) { 4102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorText.append(hexChars.charAt((c>>bn)&0xf)); 4103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ci = testText.moveIndex32(ci, 1); 4106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorText.append("\\"); 4108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorText.append("</data>\n"); 4109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Output the error 4111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char charErrorTxt[500]; 4112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorText.extract(charErrorTxt, sizeof(charErrorTxt), NULL, status); 4114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charErrorTxt[sizeof(charErrorTxt)-1] = 0; 4115103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const char *badLocale = bi->getLocaleID(ULOC_ACTUAL_LOCALE, status); 4116103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 4117103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errln("%s break monkey test error [%s]. %s. Operation = %s; Random seed = %d; buf Idx = %d\n%s", 4118103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius name, badLocale, (expectedBreaks[i]? "break expected but not found" : "break found but not expected"), 4119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorType, seed, i, charErrorTxt); 4120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loopCount++; 4125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 4127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 412927f654740f2a26ad62a5c155af9199af9e69b889claireho 413027f654740f2a26ad62a5c155af9199af9e69b889claireho// Bug 5532. UTF-8 based UText fails in dictionary code. 413127f654740f2a26ad62a5c155af9199af9e69b889claireho// This test checks the initial patch, 413227f654740f2a26ad62a5c155af9199af9e69b889claireho// which is to just keep it from crashing. Correct word boundaries 413327f654740f2a26ad62a5c155af9199af9e69b889claireho// await a proper fix to the dictionary code. 413427f654740f2a26ad62a5c155af9199af9e69b889claireho// 413527f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RBBITest::TestBug5532(void) { 413627f654740f2a26ad62a5c155af9199af9e69b889claireho // Text includes a mixture of Thai and Latin. 413727f654740f2a26ad62a5c155af9199af9e69b889claireho const unsigned char utf8Data[] = { 413827f654740f2a26ad62a5c155af9199af9e69b889claireho 0xE0u, 0xB8u, 0x82u, 0xE0u, 0xB8u, 0xB2u, 0xE0u, 0xB8u, 0xA2u, 0xE0u, 413927f654740f2a26ad62a5c155af9199af9e69b889claireho 0xB9u, 0x80u, 0xE0u, 0xB8u, 0x84u, 0xE0u, 0xB8u, 0xA3u, 0xE0u, 0xB8u, 414027f654740f2a26ad62a5c155af9199af9e69b889claireho 0xB7u, 0xE0u, 0xB9u, 0x88u, 0xE0u, 0xB8u, 0xADu, 0xE0u, 0xB8u, 0x87u, 414127f654740f2a26ad62a5c155af9199af9e69b889claireho 0xE0u, 0xB9u, 0x80u, 0xE0u, 0xB8u, 0xA5u, 0xE0u, 0xB9u, 0x88u, 0xE0u, 414227f654740f2a26ad62a5c155af9199af9e69b889claireho 0xB8u, 0x99u, 0xE0u, 0xB8u, 0x8Bu, 0xE0u, 0xB8u, 0xB5u, 0xE0u, 0xB8u, 414327f654740f2a26ad62a5c155af9199af9e69b889claireho 0x94u, 0xE0u, 0xB8u, 0xB5u, 0x20u, 0x73u, 0x69u, 0x6Du, 0x20u, 0x61u, 414427f654740f2a26ad62a5c155af9199af9e69b889claireho 0x75u, 0x64u, 0x69u, 0x6Fu, 0x2Fu, 0x20u, 0x4Du, 0x4Fu, 0x4Fu, 0x4Eu, 414527f654740f2a26ad62a5c155af9199af9e69b889claireho 0x20u, 0x65u, 0x63u, 0x6Cu, 0x69u, 0x70u, 0x73u, 0x65u, 0x20u, 0xE0u, 414627f654740f2a26ad62a5c155af9199af9e69b889claireho 0xB8u, 0xA3u, 0xE0u, 0xB8u, 0xB2u, 0xE0u, 0xB8u, 0x84u, 0xE0u, 0xB8u, 414727f654740f2a26ad62a5c155af9199af9e69b889claireho 0xB2u, 0x20u, 0x34u, 0x37u, 0x30u, 0x30u, 0x20u, 0xE0u, 0xB8u, 0xA2u, 414827f654740f2a26ad62a5c155af9199af9e69b889claireho 0xE0u, 0xB8u, 0xB9u, 0xE0u, 0xB9u, 0x82u, 0xE0u, 0xB8u, 0xA3u, 0x00}; 414927f654740f2a26ad62a5c155af9199af9e69b889claireho 415027f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status = U_ZERO_ERROR; 415127f654740f2a26ad62a5c155af9199af9e69b889claireho UText utext=UTEXT_INITIALIZER; 415227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUTF8(&utext, (const char *)utf8Data, -1, &status); 415327f654740f2a26ad62a5c155af9199af9e69b889claireho TEST_ASSERT_SUCCESS(status); 415427f654740f2a26ad62a5c155af9199af9e69b889claireho 415527f654740f2a26ad62a5c155af9199af9e69b889claireho BreakIterator *bi = BreakIterator::createWordInstance(Locale("th"), status); 415627f654740f2a26ad62a5c155af9199af9e69b889claireho TEST_ASSERT_SUCCESS(status); 415727f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_SUCCESS(status)) { 415827f654740f2a26ad62a5c155af9199af9e69b889claireho bi->setText(&utext, status); 415927f654740f2a26ad62a5c155af9199af9e69b889claireho TEST_ASSERT_SUCCESS(status); 416027f654740f2a26ad62a5c155af9199af9e69b889claireho 416127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t breakCount = 0; 416227f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t previousBreak = -1; 416327f654740f2a26ad62a5c155af9199af9e69b889claireho for (bi->first(); bi->next() != BreakIterator::DONE; breakCount++) { 416427f654740f2a26ad62a5c155af9199af9e69b889claireho // For now, just make sure that the break iterator doesn't hang. 416527f654740f2a26ad62a5c155af9199af9e69b889claireho TEST_ASSERT(previousBreak < bi->current()); 416627f654740f2a26ad62a5c155af9199af9e69b889claireho previousBreak = bi->current(); 416727f654740f2a26ad62a5c155af9199af9e69b889claireho } 416827f654740f2a26ad62a5c155af9199af9e69b889claireho TEST_ASSERT(breakCount > 0); 416927f654740f2a26ad62a5c155af9199af9e69b889claireho } 417027f654740f2a26ad62a5c155af9199af9e69b889claireho delete bi; 417127f654740f2a26ad62a5c155af9199af9e69b889claireho utext_close(&utext); 417227f654740f2a26ad62a5c155af9199af9e69b889claireho} 417327f654740f2a26ad62a5c155af9199af9e69b889claireho 417427f654740f2a26ad62a5c155af9199af9e69b889claireho 4175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 4176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// TestDebug - A place-holder test for debugging purposes. 4177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// For putting in fragments of other tests that can be invoked 4178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// for tracing without a lot of unwanted extra stuff happening. 4179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 4180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RBBITest::TestDebug(void) { 4181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 4182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 4183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int pos = 0; 4184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int ruleStatus = 0; 4185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RuleBasedBreakIterator* bi = 4187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); 4188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::Locale("th"), status); 4189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (RuleBasedBreakIterator *)BreakIterator::createSentenceInstance(Locale::getDefault(), status); 4190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s("\\u2008\\u002e\\udc6a\\u37cd\\u71d0\\u2048\\U000e006a\\u002e\\u0046\\ufd3f\\u000a\\u002e"); 4191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UnicodeString s("Aaa. Bcd"); 4192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s = s.unescape(); 4193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bi->setText(s); 4194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool r = bi->isBoundary(8); 4195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s", r?"true":"false"); 4196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 4197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = bi->last(); 4198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 4199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ruleStatus = bi->getRuleStatus(); 4200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%d\t%d\n", pos, ruleStatus); 4201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = bi->previous(); 4202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (pos != BreakIterator::DONE); 4203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 4204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4206103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusvoid RBBITest::TestProperties() { 4207103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 4208103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UnicodeSet prependSet(UNICODE_STRING_SIMPLE("[:GCB=Prepend:]"), errorCode); 4209103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (!prependSet.isEmpty()) { 4210103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius errln( 4211103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "[:GCB=Prepend:] is not empty any more. " 4212103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "Uncomment relevant lines in source/data/brkitr/char.txt and " 4213103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius "change this test to the opposite condition."); 4214103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 4215103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} 4216103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 4217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 4218