1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ****************************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (C) 1998-2003, 2006, International Business Machines Corporation * 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and others. All Rights Reserved. * 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ****************************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <errno.h> 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <stdio.h> 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <string.h> 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchar.h" 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchriter.h" 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/brkiter.h" 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/locid.h" 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/unistr.h" 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uniset.h" 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ustring.h" 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This program takes a Unicode text file containing Thai text with 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * spaces inserted where the word breaks are. It computes a copy of 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the text without spaces and uses a word instance of a Thai BreakIterator 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to compute the word breaks. The program reports any differences in the 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * breaks. 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * NOTE: by it's very nature, Thai word breaking is not exact, so it is 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * exptected that this program will always report some differences. 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This class is a break iterator that counts words and spaces. 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class SpaceBreakIterator 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The constructor: 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // text - pointer to an array of UChars to iterate over 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // count - the number of UChars in text 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SpaceBreakIterator(const UChar *text, int32_t count); 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the destructor 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ~SpaceBreakIterator(); 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // return next break position 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t next(); 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // return current word count 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t getWordCount(); 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // return current space count 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t getSpaceCount(); 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private: 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No arg constructor: private so clients can't call it. 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SpaceBreakIterator(); 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The underlying BreakIterator 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *fBreakIter; 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // address of the UChar array 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *fText; 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // number of UChars in fText 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t fTextCount; 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // current word count 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t fWordCount; 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // current space count 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t fSpaceCount; 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // UnicodeSet of SA characters 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet fComplexContext; 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // true when fBreakIter has returned DONE 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool fDone; 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This is the main class. It compares word breaks and reports the differences. 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class ThaiWordbreakTest 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public: 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The main constructor: 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // spaces - pointer to a UChar array for the text with spaces 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // spaceCount - the number of characters in the spaces array 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // noSpaces - pointer to a UChar array for the text without spaces 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // noSpaceCount - the number of characters in the noSpaces array 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // verbose - report all breaks if true, otherwise just report differences 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ThaiWordbreakTest(const UChar *spaces, int32_t spaceCount, const UChar *noSpaces, int32_t noSpaceCount, UBool verbose); 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ~ThaiWordbreakTest(); 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // returns the number of breaks that are in the spaces array 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // but aren't found in the noSpaces array 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t getBreaksNotFound(); 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // returns the number of breaks which are found in the noSpaces 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // array but aren't in the spaces array 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t getInvalidBreaks(); 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // returns the number of words found in the spaces array 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t getWordCount(); 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // reads the input Unicode text file: 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // fileName - the path name of the file 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // charCount - set to the number of UChars read from the file 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // returns - the address of the UChar array containing the characters 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UChar *readFile(char *fileName, int32_t &charCount); 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // removes spaces form the input UChar array: 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // spaces - pointer to the input UChar array 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // count - number of UChars in the spaces array 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // nonSpaceCount - the number of UChars in the result array 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // returns - the address of the UChar array with spaces removed 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UChar *crunchSpaces(const UChar *spaces, int32_t count, int32_t &nonSpaceCount); 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private: 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The no arg constructor - private so clients can't call it 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ThaiWordbreakTest(); 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This does the actual comparison: 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // spaces - the address of the UChar array for the text with spaces 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // spaceCount - the number of UChars in the spaces array 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // noSpaces - the address of the UChar array for the text without spaces 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // noSpaceCount - the number of UChars in the noSpaces array 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // returns - true if all breaks match, FALSE otherwise 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool compareWordBreaks(const UChar *spaces, int32_t spaceCount, 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *noSpaces, int32_t noSpaceCount); 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // helper method to report a break in the spaces 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // array that's not found in the noSpaces array 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void breakNotFound(int32_t br); 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // helper method to report a break that's found in 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the noSpaces array that's not in the spaces array 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void foundInvalidBreak(int32_t br); 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // count of breaks in the spaces array that 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // aren't found in the noSpaces array 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t fBreaksNotFound; 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // count of breaks found in the noSpaces array 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // that aren't in the spaces array 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t fInvalidBreaks; 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // number of words found in the spaces array 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t fWordCount; 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // report all breaks if true, otherwise just report differences 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool fVerbose; 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The main constructor: it calls compareWordBreaks and reports any differences 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ThaiWordbreakTest::ThaiWordbreakTest(const UChar *spaces, int32_t spaceCount, 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *noSpaces, int32_t noSpaceCount, UBool verbose) 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles): fBreaksNotFound(0), fInvalidBreaks(0), fWordCount(0), fVerbose(verbose) 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compareWordBreaks(spaces, spaceCount, noSpaces, noSpaceCount); 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The no arg constructor 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ThaiWordbreakTest::ThaiWordbreakTest() 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // nothing 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The destructor 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ThaiWordbreakTest::~ThaiWordbreakTest() 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // nothing? 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * returns the number of breaks in the spaces array 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * that aren't found in the noSpaces array 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)inline int32_t ThaiWordbreakTest::getBreaksNotFound() 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fBreaksNotFound; 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the number of breaks found in the noSpaces 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * array that aren't in the spaces array 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)inline int32_t ThaiWordbreakTest::getInvalidBreaks() 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fInvalidBreaks; 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the number of words found in the spaces array 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)inline int32_t ThaiWordbreakTest::getWordCount() 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fWordCount; 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This method does the acutal break comparison and reports the results. 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * It uses a SpaceBreakIterator to iterate over the text with spaces, 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and a word instance of a Thai BreakIterator to iterate over the text 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * without spaces. 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool ThaiWordbreakTest::compareWordBreaks(const UChar *spaces, int32_t spaceCount, 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *noSpaces, int32_t noSpaceCount) 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool result = TRUE; 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale thai("th"); 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCharCharacterIterator *noSpaceIter = new UCharCharacterIterator(noSpaces, noSpaceCount); 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *breakIter = BreakIterator::createWordInstance(thai, status); 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakIter->adoptText(noSpaceIter); 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SpaceBreakIterator spaceIter(spaces, spaceCount); 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t nextBreak = 0; 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t nextSpaceBreak = 0; 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t iterCount = 0; 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (TRUE) { 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextSpaceBreak = spaceIter.next(); 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextBreak = breakIter->next(); 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (nextSpaceBreak == BreakIterator::DONE || nextBreak == BreakIterator::DONE) { 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (nextBreak != BreakIterator::DONE) { 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "break iterator didn't end.\n"); 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (nextSpaceBreak != BreakIterator::DONE) { 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "premature break iterator end.\n"); 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (nextSpaceBreak != nextBreak && 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextSpaceBreak != BreakIterator::DONE && nextBreak != BreakIterator::DONE) { 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (nextSpaceBreak < nextBreak) { 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakNotFound(nextSpaceBreak); 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = FALSE; 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextSpaceBreak = spaceIter.next(); 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (nextSpaceBreak > nextBreak) { 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) foundInvalidBreak(nextBreak); 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = FALSE; 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextBreak = breakIter->next(); 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fVerbose) { 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("%d %d\n", nextSpaceBreak, nextBreak); 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fWordCount = spaceIter.getWordCount(); 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete breakIter; 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Report a break that's in the text with spaces but 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * not found in the text without spaces. 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ThaiWordbreakTest::breakNotFound(int32_t br) 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fVerbose) { 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("%d ****\n", br); 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "break not found: %d\n", br); 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBreaksNotFound += 1; 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Report a break that's found in the text without spaces 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * that isn't in the text with spaces. 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ThaiWordbreakTest::foundInvalidBreak(int32_t br) 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fVerbose) { 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("**** %d\n", br); 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "found invalid break: %d\n", br); 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInvalidBreaks += 1; 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Read the text from a file. The text must start with a Unicode Byte 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Order Mark (BOM) so that we know what order to read the bytes in. 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const UChar *ThaiWordbreakTest::readFile(char *fileName, int32_t &charCount) 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) FILE *f; 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t fileSize; 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *buffer; 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *bufferChars; 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) f = fopen(fileName, "rb"); 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( f == NULL ) { 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr,"Couldn't open %s reason: %s \n", fileName, strerror(errno)); 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fseek(f, 0, SEEK_END); 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileSize = ftell(f); 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fseek(f, 0, SEEK_SET); 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bufferChars = new char[fileSize]; 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(bufferChars == 0) { 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr,"Couldn't get memory for reading %s reason: %s \n", fileName, strerror(errno)); 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fclose(f); 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fread(bufferChars, sizeof(char), fileSize, f); 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( ferror(f) ) { 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr,"Couldn't read %s reason: %s \n", fileName, strerror(errno)); 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fclose(f); 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete[] bufferChars; 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fclose(f); 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString myText(bufferChars, fileSize, "UTF-8"); 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete[] bufferChars; 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charCount = myText.length(); 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buffer = new UChar[charCount]; 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(buffer == 0) { 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr,"Couldn't get memory for reading %s reason: %s \n", fileName, strerror(errno)); 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myText.extract(1, myText.length(), buffer); 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) charCount--; // skip the BOM 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buffer[charCount] = 0; // NULL terminate for easier reading in the debugger 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return buffer; 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Remove spaces from the input UChar array. 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * We check explicitly for a Unicode code value of 0x0020 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * because Unicode::isSpaceChar returns true for CR, LF, etc. 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const UChar *ThaiWordbreakTest::crunchSpaces(const UChar *spaces, int32_t count, int32_t &nonSpaceCount) 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, out, spaceCount; 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) spaceCount = 0; 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i < count; i += 1) { 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (spaces[i] == 0x0020 /*Unicode::isSpaceChar(spaces[i])*/) { 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) spaceCount += 1; 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nonSpaceCount = count - spaceCount; 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *noSpaces = new UChar[nonSpaceCount]; 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (noSpaces == 0) { 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "Couldn't allocate memory for the space stripped text.\n"); 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (out = 0, i = 0; i < count; i += 1) { 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (spaces[i] != 0x0020 /*! Unicode::isSpaceChar(spaces[i])*/) { 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) noSpaces[out++] = spaces[i]; 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return noSpaces; 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Generate a text file with spaces in it from a file without. 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int generateFile(const UChar *chars, int32_t length) { 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale root(""); 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCharCharacterIterator *noSpaceIter = new UCharCharacterIterator(chars, length); 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet complexContext(UNICODE_STRING_SIMPLE("[:LineBreak=SA:]"), status); 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) BreakIterator *breakIter = BreakIterator::createWordInstance(root, status); 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakIter->adoptText(noSpaceIter); 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char outbuf[1024]; 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t strlength; 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar bom = 0xFEFF; 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &bom, 1, &status)); 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t prevbreak = 0; 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (U_SUCCESS(status)) { 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t nextbreak = breakIter->next(); 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (nextbreak == BreakIterator::DONE) { 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &chars[prevbreak], 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextbreak-prevbreak, &status)); 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (nextbreak > 0 && complexContext.contains(chars[nextbreak-1]) 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && complexContext.contains(chars[nextbreak])) { 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf(" "); 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevbreak = nextbreak; 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "generate failed: %s\n", u_errorName(status)); 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return status; 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The main routine. Read the command line arguments, read the text file, 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * remove the spaces, do the comparison and report the final results 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int main(int argc, char **argv) 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *fileName = "space.txt"; 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int arg = 1; 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool verbose = FALSE; 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool generate = FALSE; 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (argc >= 2 && strcmp(argv[1], "-generate") == 0) { 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) generate = TRUE; 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) arg += 1; 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (argc >= 2 && strcmp(argv[1], "-verbose") == 0) { 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) verbose = TRUE; 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) arg += 1; 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (arg == argc - 1) { 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fileName = argv[arg++]; 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (arg != argc) { 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "Usage: %s [-verbose] [<file>]\n", argv[0]); 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 1; 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t spaceCount, nonSpaceCount; 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *spaces, *noSpaces; 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) spaces = ThaiWordbreakTest::readFile(fileName, spaceCount); 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (spaces == 0) { 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 1; 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (generate) { 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return generateFile(spaces, spaceCount); 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) noSpaces = ThaiWordbreakTest::crunchSpaces(spaces, spaceCount, nonSpaceCount); 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (noSpaces == 0) { 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 1; 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ThaiWordbreakTest test(spaces, spaceCount, noSpaces, nonSpaceCount, verbose); 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("word count: %d\n", test.getWordCount()); 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("breaks not found: %d\n", test.getBreaksNotFound()); 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("invalid breaks found: %d\n", test.getInvalidBreaks()); 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The main constructor. Clear all the counts and construct a default 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * word instance of a BreakIterator. 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpaceBreakIterator::SpaceBreakIterator(const UChar *text, int32_t count) 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) : fBreakIter(0), fText(text), fTextCount(count), fWordCount(0), fSpaceCount(0), fDone(FALSE) 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCharCharacterIterator *iter = new UCharCharacterIterator(text, count); 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fComplexContext.applyPattern(UNICODE_STRING_SIMPLE("[:LineBreak=SA:]"), status); 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Locale root(""); 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBreakIter = BreakIterator::createWordInstance(root, status); 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBreakIter->adoptText(iter); 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpaceBreakIterator::SpaceBreakIterator() 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // nothing 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The destructor. delete the underlying BreakIterator 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpaceBreakIterator::~SpaceBreakIterator() 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fBreakIter; 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the next break, counting words and spaces. 523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t SpaceBreakIterator::next() 525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fDone) { 527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return BreakIterator::DONE; 528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t nextBreak; 531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextBreak = fBreakIter->next(); 533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (nextBreak == BreakIterator::DONE) { 535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDone = TRUE; 536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return BreakIterator::DONE; 537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(nextBreak > 0 && fComplexContext.contains(fText[nextBreak-1]) 540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && fComplexContext.contains(fText[nextBreak])); 541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = nextBreak - fSpaceCount; 543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (nextBreak < fTextCount) { 545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fText[nextBreak] == 0x0020 /*Unicode::isSpaceChar(fText[nextBreak])*/) { 546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSpaceCount += fBreakIter->next() - nextBreak; 547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fWordCount += 1; 551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the current space count 557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t SpaceBreakIterator::getSpaceCount() 559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fSpaceCount; 561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the current word count 565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t SpaceBreakIterator::getWordCount() 567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fWordCount; 569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 572