1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ******************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (C) 1998-2003, 2006, International Business Machines Corporation *
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and others. All Rights Reserved.                                           *
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ******************************************************************************
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <errno.h>
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <stdio.h>
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <string.h>
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchar.h"
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchriter.h"
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/brkiter.h"
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/locid.h"
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/unistr.h"
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uniset.h"
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ustring.h"
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This program takes a Unicode text file containing Thai text with
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * spaces inserted where the word breaks are. It computes a copy of
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the text without spaces and uses a word instance of a Thai BreakIterator
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to compute the word breaks. The program reports any differences in the
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * breaks.
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * NOTE: by it's very nature, Thai word breaking is not exact, so it is
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * exptected that this program will always report some differences.
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This class is a break iterator that counts words and spaces.
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class SpaceBreakIterator
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // The constructor:
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // text  - pointer to an array of UChars to iterate over
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // count - the number of UChars in text
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SpaceBreakIterator(const UChar *text, int32_t count);
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the destructor
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ~SpaceBreakIterator();
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // return next break position
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t next();
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // return current word count
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t getWordCount();
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // return current space count
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t getSpaceCount();
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // No arg constructor: private so clients can't call it.
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SpaceBreakIterator();
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // The underlying BreakIterator
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *fBreakIter;
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // address of the UChar array
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UChar *fText;
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // number of UChars in fText
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t fTextCount;
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // current word count
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t fWordCount;
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // current space count
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t fSpaceCount;
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // UnicodeSet of SA characters
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet fComplexContext;
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // true when fBreakIter has returned DONE
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool fDone;
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This is the main class. It compares word breaks and reports the differences.
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class ThaiWordbreakTest
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // The main constructor:
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // spaces       - pointer to a UChar array for the text with spaces
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // spaceCount   - the number of characters in the spaces array
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // noSpaces     - pointer to a UChar array for the text without spaces
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // noSpaceCount - the number of characters in the noSpaces array
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // verbose      - report all breaks if true, otherwise just report differences
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ThaiWordbreakTest(const UChar *spaces, int32_t spaceCount, const UChar *noSpaces, int32_t noSpaceCount, UBool verbose);
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ~ThaiWordbreakTest();
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // returns the number of breaks that are in the spaces array
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // but aren't found in the noSpaces array
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t getBreaksNotFound();
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // returns the number of breaks which are found in the noSpaces
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // array but aren't in the spaces array
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t getInvalidBreaks();
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // returns the number of words found in the spaces array
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t getWordCount();
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // reads the input Unicode text file:
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // fileName  - the path name of the file
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // charCount - set to the number of UChars read from the file
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // returns   - the address of the UChar array containing the characters
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar *readFile(char *fileName, int32_t &charCount);
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // removes spaces form the input UChar array:
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // spaces        - pointer to the input UChar array
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // count         - number of UChars in the spaces array
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // nonSpaceCount - the number of UChars in the result array
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // returns       - the address of the UChar array with spaces removed
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar *crunchSpaces(const UChar *spaces, int32_t count, int32_t &nonSpaceCount);
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // The no arg constructor - private so clients can't call it
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ThaiWordbreakTest();
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // This does the actual comparison:
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // spaces - the address of the UChar array for the text with spaces
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // spaceCount - the number of UChars in the spaces array
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // noSpaces   - the address of the UChar array for the text without spaces
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // noSpaceCount - the number of UChars in the noSpaces array
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // returns      - true if all breaks match, FALSE otherwise
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool compareWordBreaks(const UChar *spaces, int32_t spaceCount,
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            const UChar *noSpaces, int32_t noSpaceCount);
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // helper method to report a break in the spaces
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // array that's not found in the noSpaces array
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void breakNotFound(int32_t br);
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // helper method to report a break that's found in
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the noSpaces array that's not in the spaces array
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void foundInvalidBreak(int32_t br);
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // count of breaks in the spaces array that
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // aren't found in the noSpaces array
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t fBreaksNotFound;
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // count of breaks found in the noSpaces array
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // that aren't in the spaces array
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t fInvalidBreaks;
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // number of words found in the spaces array
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t fWordCount;
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // report all breaks if true, otherwise just report differences
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool fVerbose;
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The main constructor: it calls compareWordBreaks and reports any differences
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ThaiWordbreakTest::ThaiWordbreakTest(const UChar *spaces, int32_t spaceCount,
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                     const UChar *noSpaces, int32_t noSpaceCount, UBool verbose)
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles): fBreaksNotFound(0), fInvalidBreaks(0), fWordCount(0), fVerbose(verbose)
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    compareWordBreaks(spaces, spaceCount, noSpaces, noSpaceCount);
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The no arg constructor
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ThaiWordbreakTest::ThaiWordbreakTest()
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // nothing
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The destructor
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ThaiWordbreakTest::~ThaiWordbreakTest()
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // nothing?
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * returns the number of breaks in the spaces array
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * that aren't found in the noSpaces array
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)inline int32_t ThaiWordbreakTest::getBreaksNotFound()
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fBreaksNotFound;
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the number of breaks found in the noSpaces
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * array that aren't in the spaces array
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)inline int32_t ThaiWordbreakTest::getInvalidBreaks()
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fInvalidBreaks;
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the number of words found in the spaces array
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)inline int32_t ThaiWordbreakTest::getWordCount()
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fWordCount;
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This method does the acutal break comparison and reports the results.
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * It uses a SpaceBreakIterator to iterate over the text with spaces,
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and a word instance of a Thai BreakIterator to iterate over the text
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * without spaces.
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool ThaiWordbreakTest::compareWordBreaks(const UChar *spaces, int32_t spaceCount,
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                           const UChar *noSpaces, int32_t noSpaceCount)
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool result = TRUE;
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Locale thai("th");
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UCharCharacterIterator *noSpaceIter = new UCharCharacterIterator(noSpaces, noSpaceCount);
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *breakIter = BreakIterator::createWordInstance(thai, status);
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breakIter->adoptText(noSpaceIter);
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SpaceBreakIterator spaceIter(spaces, spaceCount);
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t nextBreak = 0;
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t nextSpaceBreak = 0;
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t iterCount = 0;
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (TRUE) {
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nextSpaceBreak = spaceIter.next();
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nextBreak = breakIter->next();
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (nextSpaceBreak == BreakIterator::DONE || nextBreak == BreakIterator::DONE) {
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (nextBreak != BreakIterator::DONE) {
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fprintf(stderr, "break iterator didn't end.\n");
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else if (nextSpaceBreak != BreakIterator::DONE) {
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fprintf(stderr, "premature break iterator end.\n");
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (nextSpaceBreak != nextBreak &&
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               nextSpaceBreak != BreakIterator::DONE && nextBreak != BreakIterator::DONE) {
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (nextSpaceBreak < nextBreak) {
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                breakNotFound(nextSpaceBreak);
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                result = FALSE;
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                nextSpaceBreak = spaceIter.next();
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else if (nextSpaceBreak > nextBreak) {
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                foundInvalidBreak(nextBreak);
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                result = FALSE;
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                nextBreak = breakIter->next();
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fVerbose) {
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            printf("%d   %d\n", nextSpaceBreak, nextBreak);
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fWordCount = spaceIter.getWordCount();
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete breakIter;
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Report a break that's in the text with spaces but
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * not found in the text without spaces.
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ThaiWordbreakTest::breakNotFound(int32_t br)
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fVerbose) {
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        printf("%d   ****\n", br);
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fprintf(stderr, "break not found: %d\n", br);
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fBreaksNotFound += 1;
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Report a break that's found in the text without spaces
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * that isn't in the text with spaces.
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ThaiWordbreakTest::foundInvalidBreak(int32_t br)
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fVerbose) {
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        printf("****   %d\n", br);
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fprintf(stderr, "found invalid break: %d\n", br);
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fInvalidBreaks += 1;
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Read the text from a file. The text must start with a Unicode Byte
303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Order Mark (BOM) so that we know what order to read the bytes in.
304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const UChar *ThaiWordbreakTest::readFile(char *fileName, int32_t &charCount)
306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    FILE *f;
308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t fileSize;
309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar *buffer;
311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *bufferChars;
312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    f = fopen(fileName, "rb");
314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( f == NULL ) {
316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fprintf(stderr,"Couldn't open %s reason: %s \n", fileName, strerror(errno));
317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fseek(f, 0, SEEK_END);
321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fileSize = ftell(f);
322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fseek(f, 0, SEEK_SET);
324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bufferChars = new char[fileSize];
325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(bufferChars == 0) {
327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fprintf(stderr,"Couldn't get memory for reading %s reason: %s \n", fileName, strerror(errno));
328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fclose(f);
329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fread(bufferChars, sizeof(char), fileSize, f);
333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( ferror(f) ) {
334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fprintf(stderr,"Couldn't read %s reason: %s \n", fileName, strerror(errno));
335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fclose(f);
336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete[] bufferChars;
337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fclose(f);
340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString myText(bufferChars, fileSize, "UTF-8");
342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete[] bufferChars;
344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    charCount = myText.length();
346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    buffer = new UChar[charCount];
347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(buffer == 0) {
348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fprintf(stderr,"Couldn't get memory for reading %s reason: %s \n", fileName, strerror(errno));
349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    myText.extract(1, myText.length(), buffer);
353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    charCount--;  // skip the BOM
354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    buffer[charCount] = 0;    // NULL terminate for easier reading in the debugger
355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return buffer;
357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Remove spaces from the input UChar array.
361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * We check explicitly for a Unicode code value of 0x0020
363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * because Unicode::isSpaceChar returns true for CR, LF, etc.
364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const UChar *ThaiWordbreakTest::crunchSpaces(const UChar *spaces, int32_t count, int32_t &nonSpaceCount)
367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i, out, spaceCount;
369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    spaceCount = 0;
371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = 0; i < count; i += 1) {
372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (spaces[i] == 0x0020 /*Unicode::isSpaceChar(spaces[i])*/) {
373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            spaceCount += 1;
374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    nonSpaceCount = count - spaceCount;
378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar *noSpaces = new UChar[nonSpaceCount];
379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (noSpaces == 0) {
381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fprintf(stderr, "Couldn't allocate memory for the space stripped text.\n");
382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (out = 0, i = 0; i < count; i += 1) {
386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (spaces[i] != 0x0020 /*! Unicode::isSpaceChar(spaces[i])*/) {
387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            noSpaces[out++] = spaces[i];
388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return noSpaces;
392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Generate a text file with spaces in it from a file without.
396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int generateFile(const UChar *chars, int32_t length) {
398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Locale root("");
399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UCharCharacterIterator *noSpaceIter = new UCharCharacterIterator(chars, length);
400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet complexContext(UNICODE_STRING_SIMPLE("[:LineBreak=SA:]"), status);
403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    BreakIterator *breakIter = BreakIterator::createWordInstance(root, status);
404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    breakIter->adoptText(noSpaceIter);
405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char outbuf[1024];
406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t strlength;
407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar bom = 0xFEFF;
408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &bom, 1, &status));
410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t prevbreak = 0;
411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (U_SUCCESS(status)) {
412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t nextbreak = breakIter->next();
413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (nextbreak == BreakIterator::DONE) {
414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        printf("%s", u_strToUTF8(outbuf, sizeof(outbuf), &strlength, &chars[prevbreak],
417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                    nextbreak-prevbreak, &status));
418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (nextbreak > 0 && complexContext.contains(chars[nextbreak-1])
419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            && complexContext.contains(chars[nextbreak])) {
420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            printf(" ");
421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        prevbreak = nextbreak;
423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fprintf(stderr, "generate failed: %s\n", u_errorName(status));
427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return status;
428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    else {
430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The main routine. Read the command line arguments, read the text file,
436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * remove the spaces, do the comparison and report the final results
437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int main(int argc, char **argv)
439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *fileName = "space.txt";
441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int arg = 1;
442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool verbose = FALSE;
443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool generate = FALSE;
444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (argc >= 2 && strcmp(argv[1], "-generate") == 0) {
446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        generate = TRUE;
447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        arg += 1;
448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (argc >= 2 && strcmp(argv[1], "-verbose") == 0) {
451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        verbose = TRUE;
452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        arg += 1;
453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (arg == argc - 1) {
456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fileName = argv[arg++];
457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (arg != argc) {
460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fprintf(stderr, "Usage: %s [-verbose] [<file>]\n", argv[0]);
461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 1;
462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t spaceCount, nonSpaceCount;
465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UChar *spaces, *noSpaces;
466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    spaces = ThaiWordbreakTest::readFile(fileName, spaceCount);
468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (spaces == 0) {
470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 1;
471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (generate) {
474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return generateFile(spaces, spaceCount);
475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    noSpaces = ThaiWordbreakTest::crunchSpaces(spaces, spaceCount, nonSpaceCount);
478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (noSpaces == 0) {
480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 1;
481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ThaiWordbreakTest test(spaces, spaceCount, noSpaces, nonSpaceCount, verbose);
484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("word count: %d\n", test.getWordCount());
486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("breaks not found: %d\n", test.getBreaksNotFound());
487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("invalid breaks found: %d\n", test.getInvalidBreaks());
488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return 0;
490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The main constructor. Clear all the counts and construct a default
494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * word instance of a BreakIterator.
495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpaceBreakIterator::SpaceBreakIterator(const UChar *text, int32_t count)
497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  : fBreakIter(0), fText(text), fTextCount(count), fWordCount(0), fSpaceCount(0), fDone(FALSE)
498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UCharCharacterIterator *iter = new UCharCharacterIterator(text, count);
500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fComplexContext.applyPattern(UNICODE_STRING_SIMPLE("[:LineBreak=SA:]"), status);
502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Locale root("");
503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fBreakIter = BreakIterator::createWordInstance(root, status);
505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fBreakIter->adoptText(iter);
506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpaceBreakIterator::SpaceBreakIterator()
509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // nothing
511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The destructor. delete the underlying BreakIterator
515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpaceBreakIterator::~SpaceBreakIterator()
517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fBreakIter;
519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the next break, counting words and spaces.
523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t SpaceBreakIterator::next()
525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fDone) {
527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return BreakIterator::DONE;
528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t nextBreak;
531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    do {
532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nextBreak = fBreakIter->next();
533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (nextBreak == BreakIterator::DONE) {
535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fDone = TRUE;
536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return BreakIterator::DONE;
537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(nextBreak > 0 && fComplexContext.contains(fText[nextBreak-1])
540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            && fComplexContext.contains(fText[nextBreak]));
541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)   int32_t result = nextBreak - fSpaceCount;
543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (nextBreak < fTextCount) {
545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fText[nextBreak] == 0x0020 /*Unicode::isSpaceChar(fText[nextBreak])*/) {
546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fSpaceCount += fBreakIter->next() - nextBreak;
547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fWordCount += 1;
551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the current space count
557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t SpaceBreakIterator::getSpaceCount()
559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fSpaceCount;
561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the current word count
565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t SpaceBreakIterator::getWordCount()
567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fWordCount;
569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
572