1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)***************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Copyright (C) 1999-2010 International Business Machines Corporation
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   and others. All rights reserved.
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)***************************************************************************
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  file:  rbbi.c    Contains the implementation of the rule based break iterator
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                   runtime engine and the API implementation for
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                   class RuleBasedBreakIterator
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypeinfo.h"  // for 'typeid' to work
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_BREAK_ITERATION
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/rbbi.h"
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/schriter.h"
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchriter.h"
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/udata.h"
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uclean.h"
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "rbbidata.h"
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "rbbirb.h"
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h"
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cstring.h"
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "umutex.h"
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucln_cmn.h"
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "brkeng.h"
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uassert.h"
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvector.h"
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// if U_LOCAL_SERVICE_HOOK is defined, then localsvc.cpp is expected to be included.
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if U_LOCAL_SERVICE_HOOK
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "localsvc.h"
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef RBBI_DEBUG
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UBool fTrace = FALSE;
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The state number of the starting state
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define START_STATE 1
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The state-transition value indicating "stop"
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define STOP_STATE  0
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator)
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//=======================================================================
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// constructors
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//=======================================================================
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Constructs a RuleBasedBreakIterator that uses the already-created
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * tables object that is passed in as a parameter.
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status)
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    init();
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fData = new RBBIDataWrapper(data, status); // status checked in constructor
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {return;}
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(fData == 0) {
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_MEMORY_ALLOCATION_ERROR;
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Same as above but does not adopt memory
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    init();
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status); // status checked in constructor
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {return;}
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(fData == 0) {
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_MEMORY_ALLOCATION_ERROR;
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   Constructor   from a UDataMemory handle to precompiled break rules
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                 stored in an ICU data file.
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status)
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    init();
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fData = new RBBIDataWrapper(udm, status); // status checked in constructor
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {return;}
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(fData == 0) {
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_MEMORY_ALLOCATION_ERROR;
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   Constructor       from a set of rules supplied as a string.
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString  &rules,
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                UParseError          &parseError,
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                UErrorCode           &status)
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    init();
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {return;}
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        RBBIRuleBuilder::createRuleBasedBreakIterator(rules, &parseError, status);
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Note:  This is a bit awkward.  The RBBI ruleBuilder has a factory method that
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //        creates and returns a complete RBBI.  From here, in a constructor, we
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //        can't just return the object created by the builder factory, hence
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //        the assignment of the factory created object to "this".
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_SUCCESS(status)) {
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *this = *bi;
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete bi;
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Default Constructor.      Create an empty shell that can be set up later.
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                           Used when creating a RuleBasedBreakIterator from a set
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                           of rules.
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator() {
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    init();
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   Copy constructor.  Will produce a break iterator with the same behavior,
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                      and which iterates over the same text, as the one passed in.
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other)
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles): BreakIterator(other)
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    this->init();
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *this = other;
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Destructor
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::~RuleBasedBreakIterator() {
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // fCharIter was adopted from the outside.
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete fCharIter;
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCharIter = NULL;
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fSCharIter;
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCharIter = NULL;
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fDCharIter;
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fDCharIter = NULL;
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_close(fText);
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData != NULL) {
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fData->removeReference();
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fData = NULL;
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCachedBreakPositions) {
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_free(fCachedBreakPositions);
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fCachedBreakPositions = NULL;
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fLanguageBreakEngines) {
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete fLanguageBreakEngines;
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fLanguageBreakEngines = NULL;
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fUnhandledBreakEngine) {
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete fUnhandledBreakEngine;
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fUnhandledBreakEngine = NULL;
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Assignment operator.  Sets this iterator to have the same behavior,
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and iterate over the same text, as the one passed in.
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator&
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (this == &that) {
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return *this;
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();    // Delete break cache information
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fBreakType = that.fBreakType;
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fLanguageBreakEngines != NULL) {
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete fLanguageBreakEngines;
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fLanguageBreakEngines = NULL;   // Just rebuild for now
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // TODO: clone fLanguageBreakEngines from "that"
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fText = utext_clone(fText, that.fText, FALSE, TRUE, &status);
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete fCharIter;
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCharIter = NULL;
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (that.fCharIter != NULL ) {
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // This is a little bit tricky - it will intially appear that
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //  this->fCharIter is adopted, even if that->fCharIter was
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //  not adopted.  That's ok.
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fCharIter = that.fCharIter->clone();
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData != NULL) {
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fData->removeReference();
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fData = NULL;
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (that.fData != NULL) {
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fData = that.fData->addReference();
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return *this;
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    init()      Shared initialization routine.   Used by all the constructors.
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                Initializes all fields, leaving the object in a consistent state.
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RuleBasedBreakIterator::init() {
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status    = U_ZERO_ERROR;
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fBufferClone          = FALSE;
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fText                 = utext_openUChars(NULL, NULL, 0, &status);
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCharIter             = NULL;
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fSCharIter            = NULL;
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fDCharIter            = NULL;
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fData                 = NULL;
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastRuleStatusIndex  = 0;
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastStatusIndexValid = TRUE;
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fDictionaryCharCount  = 0;
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fBreakType            = UBRK_WORD;  // Defaulting BreakType to word gives reasonable
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                        //   dictionary behavior for Break Iterators that are
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                        //   built from rules.  Even better would be the ability to
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                        //   declare the type in the rules.
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCachedBreakPositions    = NULL;
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLanguageBreakEngines    = NULL;
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fUnhandledBreakEngine    = NULL;
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNumCachedBreakPositions = 0;
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fPositionInCache         = 0;
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef RBBI_DEBUG
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static UBool debugInitDone = FALSE;
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (debugInitDone == FALSE) {
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        char *debugEnv = getenv("U_RBBIDEBUG");
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (debugEnv && uprv_strstr(debugEnv, "trace")) {
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fTrace = TRUE;
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        debugInitDone = TRUE;
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//    clone - Returns a newly-constructed RuleBasedBreakIterator with the same
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//            behavior, and iterating over the same text, as this one.
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//            Virtual function: does the right thing with subclasses.
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)BreakIterator*
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::clone(void) const {
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return new RuleBasedBreakIterator(*this);
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Equality operator.  Returns TRUE if both BreakIterators are of the
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * same class, have the same behavior, and iterate over the same text.
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (typeid(*this) != typeid(that)) {
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that;
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!utext_equals(fText, that2.fText)) {
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // The two break iterators are operating on different text,
303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //   or have a different interation position.
304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // TODO:  need a check for when in a dictionary region at different offsets.
308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (that2.fData == fData ||
310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (fData != NULL && that2.fData != NULL && *that2.fData == *fData)) {
311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // The two break iterators are using the same rules.
312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return TRUE;
313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return FALSE;
315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Compute a hash code for this BreakIterator
319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return A hash code
320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t
322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::hashCode(void) const {
323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t   hash = 0;
324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData != NULL) {
325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        hash = fData->hashCode();
326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return hash;
328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) {
332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();
336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fText = utext_clone(fText, ut, FALSE, TRUE, &status);
337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Set up a dummy CharacterIterator to be returned if anyone
339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   calls getText().  With input from UText, there is no reasonable
340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   way to return a characterIterator over the actual input text.
341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   Return one over an empty string instead - this is the closest
342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   we can come to signaling a failure.
343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   (GetText() is obsolete, this failure is sort of OK)
344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fDCharIter == NULL) {
345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        static const UChar c = 0;
346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fDCharIter = new UCharCharacterIterator(&c, 0);
347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fDCharIter == NULL) {
348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            status = U_MEMORY_ALLOCATION_ERROR;
349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // existing fCharIter was adopted from the outside.  Delete it now.
355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete fCharIter;
356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCharIter = fDCharIter;
358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    this->first();
360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const {
364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UText *result = utext_clone(fillIn, fText, FALSE, TRUE, &status);
365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the description used to create this iterator
372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const UnicodeString&
374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::getRules() const {
375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData != NULL) {
376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return fData->getRuleSourceString();
377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        static const UnicodeString *s;
379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (s == NULL) {
380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // TODO:  something more elegant here.
381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //        perhaps API should return the string by value.
382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //        Note:  thread unsafe init & leak are semi-ok, better than
383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //               what was before.  Sould be cleaned up, though.
384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s = new UnicodeString;
385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return *s;
387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//=======================================================================
391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// BreakIterator overrides
392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//=======================================================================
393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return a CharacterIterator over the text being analyzed.
396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)CharacterIterator&
398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::getText() const {
399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return *fCharIter;
400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set the iterator to analyze a new piece of text.  This function resets
404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the current iteration position to the beginning of the text.
405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param newText An iterator over the text to analyze.
406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::adoptText(CharacterIterator* newText) {
409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // If we are holding a CharacterIterator adopted from a
410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   previous call to this function, delete it now.
411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete fCharIter;
413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCharIter = newText;
416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();
418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (newText==NULL || newText->startIndex() != 0) {
419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // startIndex !=0 wants to be an error, but there's no way to report it.
420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Make the iterator text be an empty string.
421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fText = utext_openUChars(fText, NULL, 0, &status);
422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fText = utext_openCharacterIterator(fText, newText, &status);
424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    this->first();
426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set the iterator to analyze a new piece of text.  This function resets
430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the current iteration position to the beginning of the text.
431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param newText An iterator over the text to analyze.
432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::setText(const UnicodeString& newText) {
435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();
437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fText = utext_openConstUnicodeString(fText, &newText, &status);
438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Set up a character iterator on the string.
440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   Needed in case someone calls getText().
441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Can not, unfortunately, do this lazily on the (probably never)
442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  call to getText(), because getText is const.
443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fSCharIter == NULL) {
444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fSCharIter = new StringCharacterIterator(newText);
445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fSCharIter->setText(newText);
447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) {
450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // old fCharIter was adopted from the outside.  Delete it.
451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete fCharIter;
452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCharIter = fSCharIter;
454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    this->first();
456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sets the current iteration position to the beginning of the text.
462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The offset of the beginning of the text.
463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::first(void) {
465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();
466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastRuleStatusIndex  = 0;
467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastStatusIndexValid = TRUE;
468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //if (fText == NULL)
469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //    return BreakIterator::DONE;
470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_setNativeIndex(fText, 0);
472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return 0;
473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sets the current iteration position to the end of the text.
477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The text's past-the-end offset.
478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::last(void) {
480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();
481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fText == NULL) {
482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fLastRuleStatusIndex  = 0;
483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fLastStatusIndexValid = TRUE;
484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return BreakIterator::DONE;
485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastStatusIndexValid = FALSE;
488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t pos = (int32_t)utext_nativeLength(fText);
489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_setNativeIndex(fText, pos);
490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return pos;
491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Advances the iterator either forward or backward the specified number of steps.
495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Negative values move backward, and positive values move forward.  This is
496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * equivalent to repeatedly calling next() or previous().
497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param n The number of steps to move.  The sign indicates the direction
498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (negative is backwards, and positive is forwards).
499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The character offset of the boundary position n boundaries away from
500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the current one.
501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::next(int32_t n) {
503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t result = current();
504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (n > 0) {
505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result = next();
506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        --n;
507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (n < 0) {
509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result = previous();
510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ++n;
511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Advances the iterator to the next boundary position.
517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The position of the first boundary after this one.
518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::next(void) {
520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if we have cached break positions and we're still in the range
521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // covered by them, just move one step forward in the cache
522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCachedBreakPositions != NULL) {
523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fPositionInCache < fNumCachedBreakPositions - 1) {
524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ++fPositionInCache;
525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t pos = fCachedBreakPositions[fPositionInCache];
526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utext_setNativeIndex(fText, pos);
527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return pos;
528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else {
530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            reset();
531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t startPos = current();
535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t result = handleNext(fData->fForwardTable);
536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fDictionaryCharCount > 0) {
537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result = checkDictionary(startPos, result, FALSE);
538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Advances the iterator backwards, to the last boundary preceding this one.
544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The position of the last boundary position preceding this one.
545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::previous(void) {
547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t result;
548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t startPos;
549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if we have cached break positions and we're still in the range
551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // covered by them, just move one step backward in the cache
552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCachedBreakPositions != NULL) {
553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fPositionInCache > 0) {
554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            --fPositionInCache;
555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // If we're at the beginning of the cache, need to reevaluate the
556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // rule status
557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fPositionInCache <= 0) {
558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fLastStatusIndexValid = FALSE;
559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t pos = fCachedBreakPositions[fPositionInCache];
561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utext_setNativeIndex(fText, pos);
562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return pos;
563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else {
565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            reset();
566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if we're already sitting at the beginning of the text, return DONE
570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fText == NULL || (startPos = current()) == 0) {
571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fLastRuleStatusIndex  = 0;
572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fLastStatusIndexValid = TRUE;
573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return BreakIterator::DONE;
574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData->fSafeRevTable != NULL || fData->fSafeFwdTable != NULL) {
577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result = handlePrevious(fData->fReverseTable);
578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fDictionaryCharCount > 0) {
579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            result = checkDictionary(result, startPos, TRUE);
580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return result;
582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // old rule syntax
585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // set things up.  handlePrevious() will back us up to some valid
586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // break position before the current position (we back our internal
587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // iterator up one step to prevent handlePrevious() from returning
588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the current position), but not necessarily the last one before
589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // where we started
591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t start = current();
593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTEXT_PREVIOUS32(fText);
595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t lastResult    = handlePrevious(fData->fReverseTable);
596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (lastResult == UBRK_DONE) {
597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        lastResult = 0;
598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utext_setNativeIndex(fText, 0);
599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    result = lastResult;
601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t lastTag       = 0;
602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool   breakTagValid = FALSE;
603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // iterate forward from the known break position until we pass our
605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // starting point.  The last break position before the starting
606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // point is our return value
607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (;;) {
609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result         = next();
610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (result == BreakIterator::DONE || result >= start) {
611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        lastResult     = result;
614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        lastTag        = fLastRuleStatusIndex;
615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        breakTagValid  = TRUE;
616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // fLastBreakTag wants to have the value for section of text preceding
619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the result position that we are to return (in lastResult.)  If
620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the backwards rules overshot and the above loop had to do two or more
621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // next()s to move up to the desired return position, we will have a valid
622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // tag value. But, if handlePrevious() took us to exactly the correct result positon,
623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // we wont have a tag value for that position, which is only set by handleNext().
624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // set the current iteration position to be the last break position
626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // before where we started, and then return that value
627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_setNativeIndex(fText, lastResult);
628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastRuleStatusIndex  = lastTag;       // for use by getRuleStatus()
629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastStatusIndexValid = breakTagValid;
630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // No need to check the dictionary; it will have been handled by
632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // next()
633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return lastResult;
635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sets the iterator to refer to the first boundary position following
639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the specified position.
640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @offset The position from which to begin searching for a break position.
641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The position of the first break after the current position.
642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::following(int32_t offset) {
644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if we have cached break positions and offset is in the range
645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // covered by them, use them
646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // TODO: could use binary search
647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // TODO: what if offset is outside range, but break is not?
648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCachedBreakPositions != NULL) {
649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (offset >= fCachedBreakPositions[0]
650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                && offset < fCachedBreakPositions[fNumCachedBreakPositions - 1]) {
651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fPositionInCache = 0;
652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // We are guaranteed not to leave the array due to range test above
653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (offset >= fCachedBreakPositions[fPositionInCache]) {
654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                ++fPositionInCache;
655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t pos = fCachedBreakPositions[fPositionInCache];
657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utext_setNativeIndex(fText, pos);
658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return pos;
659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else {
661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            reset();
662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if the offset passed in is already past the end of the text,
666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // just return DONE; if it's before the beginning, return the
667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // text's starting offset
668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastRuleStatusIndex  = 0;
669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastStatusIndexValid = TRUE;
670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fText == NULL || offset >= utext_nativeLength(fText)) {
671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        last();
672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return next();
673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    else if (offset < 0) {
675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return first();
676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // otherwise, set our internal iteration position (temporarily)
679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // to the position passed in.  If this is the _beginning_ position,
680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // then we can just use next() to get our return value
681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t result = 0;
683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData->fSafeRevTable != NULL) {
685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // new rule syntax
686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utext_setNativeIndex(fText, offset);
687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // move forward one codepoint to prepare for moving back to a
688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // safe point.
689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // this handles offset being between a supplementary character
690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTEXT_NEXT32(fText);
691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // handlePrevious will move most of the time to < 1 boundary away
692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        handlePrevious(fData->fSafeRevTable);
693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t result = next();
694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (result <= offset) {
695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            result = next();
696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return result;
698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData->fSafeFwdTable != NULL) {
700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // backup plan if forward safe table is not available
701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utext_setNativeIndex(fText, offset);
702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTEXT_PREVIOUS32(fText);
703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // handle next will give result >= offset
704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        handleNext(fData->fSafeFwdTable);
705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // previous will give result 0 or 1 boundary away from offset,
706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // most of the time
707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // we have to
708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t oldresult = previous();
709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (oldresult > offset) {
710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t result = previous();
711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (result <= offset) {
712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return oldresult;
713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            oldresult = result;
715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t result = next();
717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (result <= offset) {
718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return next();
719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return result;
721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // otherwise, we have to sync up first.  Use handlePrevious() to back
723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // up to a known break position before the specified position (if
724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // we can determine that the specified position is a break position,
725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // we don't back up at all).  This may or may not be the last break
726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // position at or before our starting position.  Advance forward
727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // from here until we've passed the starting position.  The position
728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // we stop on will be the first break position after the specified one.
729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // old rule syntax
730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_setNativeIndex(fText, offset);
732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (offset==0 ||
733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (offset==1  && utext_getNativeIndex(fText)==0)) {
734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return next();
735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    result = previous();
737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (result != BreakIterator::DONE && result <= offset) {
739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result = next();
740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sets the iterator to refer to the last boundary position before the
747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * specified position.
748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @offset The position to begin searching for a break from.
749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The position of the last boundary before the starting position.
750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if we have cached break positions and offset is in the range
753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // covered by them, use them
754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCachedBreakPositions != NULL) {
755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // TODO: binary search?
756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // TODO: What if offset is outside range, but break is not?
757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (offset > fCachedBreakPositions[0]
758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                && offset <= fCachedBreakPositions[fNumCachedBreakPositions - 1]) {
759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fPositionInCache = 0;
760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (fPositionInCache < fNumCachedBreakPositions
761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                   && offset > fCachedBreakPositions[fPositionInCache])
762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                ++fPositionInCache;
763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            --fPositionInCache;
764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // If we're at the beginning of the cache, need to reevaluate the
765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // rule status
766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fPositionInCache <= 0) {
767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fLastStatusIndexValid = FALSE;
768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utext_setNativeIndex(fText, fCachedBreakPositions[fPositionInCache]);
770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return fCachedBreakPositions[fPositionInCache];
771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else {
773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            reset();
774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if the offset passed in is already past the end of the text,
778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // just return DONE; if it's before the beginning, return the
779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // text's starting offset
780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fText == NULL || offset > utext_nativeLength(fText)) {
781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // return BreakIterator::DONE;
782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return last();
783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    else if (offset < 0) {
785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return first();
786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if we start by updating the current iteration position to the
789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // position specified by the caller, we can just use previous()
790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // to carry out this operation
791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData->fSafeFwdTable != NULL) {
793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // new rule syntax
794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utext_setNativeIndex(fText, offset);
795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t newOffset = (int32_t)UTEXT_GETNATIVEINDEX(fText);
796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (newOffset != offset) {
797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Will come here if specified offset was not a code point boundary AND
798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   the underlying implmentation is using UText, which snaps any non-code-point-boundary
799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   indices to the containing code point.
800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // For breakitereator::preceding only, these non-code-point indices need to be moved
801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   up to refer to the following codepoint.
802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UTEXT_NEXT32(fText);
803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            offset = (int32_t)UTEXT_GETNATIVEINDEX(fText);
804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // TODO:  (synwee) would it be better to just check for being in the middle of a surrogate pair,
807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //        rather than adjusting the position unconditionally?
808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //        (Change would interact with safe rules.)
809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // TODO:  change RBBI behavior for off-boundary indices to match that of UText?
810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //        affects only preceding(), seems cleaner, but is slightly different.
811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTEXT_PREVIOUS32(fText);
812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        handleNext(fData->fSafeFwdTable);
813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (result >= offset) {
815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            result = previous();
816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return result;
818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData->fSafeRevTable != NULL) {
820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // backup plan if forward safe table is not available
821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //  TODO:  check whether this path can be discarded
822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //         It's probably OK to say that rules must supply both safe tables
823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //            if they use safe tables at all.  We have certainly never described
824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //            to anyone how to work with just one safe table.
825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utext_setNativeIndex(fText, offset);
826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTEXT_NEXT32(fText);
827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // handle previous will give result <= offset
829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        handlePrevious(fData->fSafeRevTable);
830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // next will give result 0 or 1 boundary away from offset,
832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // most of the time
833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // we have to
834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t oldresult = next();
835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (oldresult < offset) {
836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t result = next();
837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (result >= offset) {
838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return oldresult;
839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            oldresult = result;
841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t result = previous();
843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (result >= offset) {
844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return previous();
845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return result;
847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // old rule syntax
850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_setNativeIndex(fText, offset);
851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return previous();
852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns true if the specfied position is a boundary position.  As a side
856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * effect, leaves the iterator pointing to the first boundary position at
857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or after "offset".
858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param offset the offset to check.
859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return True if "offset" is a boundary position.
860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the beginning index of the iterator is always a boundary position by definition
863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (offset == 0) {
864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        first();       // For side effects on current position, tag values.
865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return TRUE;
866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (offset == (int32_t)utext_nativeLength(fText)) {
869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        last();       // For side effects on current position, tag values.
870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return TRUE;
871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // out-of-range indexes are never boundary positions
874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (offset < 0) {
875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        first();       // For side effects on current position, tag values.
876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (offset > utext_nativeLength(fText)) {
880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        last();        // For side effects on current position, tag values.
881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // otherwise, we can use following() on the position before the specified
885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // one and return true if the position we get back is the one the user
886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // specified
887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_previous32From(fText, offset);
888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t backOne = (int32_t)UTEXT_GETNATIVEINDEX(fText);
889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool    result  = following(backOne) == offset;
890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the current iteration position.
895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The current iteration position.
896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::current(void) const {
898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t  pos = (int32_t)UTEXT_GETNATIVEINDEX(fText);
899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return pos;
900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//=======================================================================
903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// implementation
904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//=======================================================================
905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// RBBIRunMode  -  the state machine runs an extra iteration at the beginning and end
908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                 of user text.  A variable with this enum type keeps track of where we
909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                 are.  The state machine only fetches user input while in the RUN mode.
910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum RBBIRunMode {
912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBI_START,     // state machine processing is before first char of input
913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBI_RUN,       // state machine processing is in the user text
914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBI_END        // state machine processing is after end of user text.
915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------------
919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  handleNext(stateTable)
921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     This method is the actual implementation of the rbbi next() method.
922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     This method initializes the state machine to state 1
923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     and advances through the text character by character until we reach the end
924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     of the text or the state machine transitions to state 0.  We update our return
925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//     value every time the state machine passes through an accepting state.
926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------------
928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             state;
930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int16_t             category        = 0;
931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBIRunMode         mode;
932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBIStateTableRow  *row;
934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32             c;
935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             lookaheadStatus = 0;
936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             lookaheadTagIdx = 0;
937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             result          = 0;
938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             initialPosition = 0;
939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             lookaheadResult = 0;
940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool               lookAheadHardBreak = (statetable->fFlags & RBBI_LOOKAHEAD_HARD_BREAK) != 0;
941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char         *tableData       = statetable->fTableData;
942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t            tableRowLen     = statetable->fRowLen;
943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    #ifdef RBBI_DEBUG
945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fTrace) {
946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            RBBIDebugPuts("Handle Next   pos   char  state category");
947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    #endif
949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // No matter what, handleNext alway correctly sets the break tag value.
951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastStatusIndexValid = TRUE;
952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastRuleStatusIndex = 0;
953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if we're already at the end of the text, return DONE.
955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText);
956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    result          = initialPosition;
957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c               = UTEXT_NEXT32(fText);
958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData == NULL || c==U_SENTINEL) {
959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return BreakIterator::DONE;
960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Set the initial state for the state machine
963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    state = START_STATE;
964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    row = (RBBIStateTableRow *)
965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //(statetable->fTableData + (statetable->fRowLen * state));
966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (tableData + tableRowLen * state);
967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    mode     = RBBI_RUN;
970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (statetable->fFlags & RBBI_BOF_REQUIRED) {
971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        category = 2;
972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        mode     = RBBI_START;
973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // loop until we reach the end of the text or transition to state 0
977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (;;) {
979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (c == U_SENTINEL) {
980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Reached end of input string.
981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (mode == RBBI_END) {
982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // We have already run the loop one last time with the
983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //   character set to the psueudo {eof} value.  Now it is time
984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //   to unconditionally bail out.
985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (lookaheadResult > result) {
986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // We ran off the end of the string with a pending look-ahead match.
987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // Treat this as if the look-ahead condition had been met, and return
988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    //  the match at the / position from the look-ahead rule.
989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    result               = lookaheadResult;
990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    fLastRuleStatusIndex = lookaheadTagIdx;
991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    lookaheadStatus = 0;
992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Run the loop one last time with the fake end-of-input character category.
996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            mode = RBBI_END;
997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            category = 1;
998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Get the char category.  An incoming category of 1 or 2 means that
1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //      we are preset for doing the beginning or end of input, and
1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //      that we shouldn't get a category from an actual text input character.
1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (mode == RBBI_RUN) {
1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // look up the current character's character category, which tells us
1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // which column in the state table to look at.
1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Note:  the 16 in UTRIE_GET16 refers to the size of the data being returned,
1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //        not the size of the character going in, which is a UChar32.
1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //
1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UTRIE_GET16(&fData->fTrie, c, category);
1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Check the dictionary bit in the character's category.
1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //    Counter is only used by dictionary based iterators (subclasses).
1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //    Chars that need to be handled by a dictionary have a flag bit set
1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //    in their category values.
1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //
1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if ((category & 0x4000) != 0)  {
1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fDictionaryCharCount++;
1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //  And off the dictionary flag bit.
1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                category &= ~0x4000;
1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        #ifdef RBBI_DEBUG
1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fTrace) {
1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                RBBIDebugPrintf("             %4ld   ", utext_getNativeIndex(fText));
1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (0x20<=c && c<0x7f) {
1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    RBBIDebugPrintf("\"%c\"  ", c);
1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                } else {
1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    RBBIDebugPrintf("%5x  ", c);
1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                RBBIDebugPrintf("%3d  %3d\n", state, category);
1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        #endif
1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // State Transition - move machine to its next state
1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        state = row->fNextState[category];
1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        row = (RBBIStateTableRow *)
1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // (statetable->fTableData + (statetable->fRowLen * state));
1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (tableData + tableRowLen * state);
1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (row->fAccepting == -1) {
1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Match found, common case.
1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (mode != RBBI_START) {
1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fLastRuleStatusIndex = row->fTagIdx;   // Remember the break status (tag) values.
1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (row->fLookAhead != 0) {
1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (lookaheadStatus != 0
1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                && row->fAccepting == lookaheadStatus) {
1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Lookahead match is completed.
1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                result               = lookaheadResult;
1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fLastRuleStatusIndex = lookaheadTagIdx;
1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                lookaheadStatus      = 0;
1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // TODO:  make a standalone hard break in a rule work.
1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (lookAheadHardBreak) {
1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UTEXT_SETNATIVEINDEX(fText, result);
1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return result;
1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Look-ahead completed, but other rules may match further.  Continue on
1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //  TODO:  junk this feature?  I don't think it's used anywhwere.
1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                goto continueOn;
1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t  r = (int32_t)UTEXT_GETNATIVEINDEX(fText);
1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lookaheadResult = r;
1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lookaheadStatus = row->fLookAhead;
1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lookaheadTagIdx = row->fTagIdx;
1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            goto continueOn;
1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (row->fAccepting != 0) {
1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Because this is an accepting state, any in-progress look-ahead match
1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   is no longer relavant.  Clear out the pending lookahead status.
1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lookaheadStatus = 0;           // clear out any pending look-ahead match.
1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)continueOn:
1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (state == STOP_STATE) {
1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // This is the normal exit from the lookup state machine.
1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // We have advanced through the string until it is certain that no
1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   longer match is possible, no matter what characters follow.
1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Advance to the next character.
1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // If this is a beginning-of-input loop iteration, don't advance
1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    the input position.  The next iteration will be processing the
1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    first real input character.
1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (mode == RBBI_RUN) {
1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c = UTEXT_NEXT32(fText);
1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (mode == RBBI_START) {
1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                mode = RBBI_RUN;
1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // The state machine is done.  Check whether it found a match...
1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // If the iterator failed to advance in the match engine, force it ahead by one.
1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   (This really indicates a defect in the break rules.  They should always match
1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //    at least one character.)
1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (result == initialPosition) {
1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTEXT_SETNATIVEINDEX(fText, initialPosition);
1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTEXT_NEXT32(fText);
1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Leave the iterator at our result position.
1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTEXT_SETNATIVEINDEX(fText, result);
1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    #ifdef RBBI_DEBUG
1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fTrace) {
1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            RBBIDebugPrintf("result = %d\n\n", result);
1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    #endif
1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------------
1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  handlePrevious()
1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//      Iterate backwards, according to the logic of the reverse rules.
1135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//      This version handles the exact style backwards rules.
1136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//      The logic of this function is very similar to handleNext(), above.
1138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-----------------------------------------------------------------------------------
1140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable) {
1141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             state;
1142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int16_t             category        = 0;
1143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBIRunMode         mode;
1144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RBBIStateTableRow  *row;
1145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32             c;
1146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             lookaheadStatus = 0;
1147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             result          = 0;
1148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             initialPosition = 0;
1149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t             lookaheadResult = 0;
1150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool               lookAheadHardBreak = (statetable->fFlags & RBBI_LOOKAHEAD_HARD_BREAK) != 0;
1151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    #ifdef RBBI_DEBUG
1153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fTrace) {
1154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            RBBIDebugPuts("Handle Previous   pos   char  state category");
1155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    #endif
1157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // handlePrevious() never gets the rule status.
1159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Flag the status as invalid; if the user ever asks for status, we will need
1160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // to back up, then re-find the break position using handleNext(), which does
1161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // get the status value.
1162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastStatusIndexValid = FALSE;
1163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fLastRuleStatusIndex = 0;
1164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if we're already at the start of the text, return DONE.
1166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fText == NULL || fData == NULL || UTEXT_GETNATIVEINDEX(fText)==0) {
1167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return BreakIterator::DONE;
1168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Set up the starting char.
1171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText);
1172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    result          = initialPosition;
1173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c               = UTEXT_PREVIOUS32(fText);
1174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Set the initial state for the state machine
1176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    state = START_STATE;
1177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    row = (RBBIStateTableRow *)
1178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (statetable->fTableData + (statetable->fRowLen * state));
1179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    category = 3;
1180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    mode     = RBBI_RUN;
1181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (statetable->fFlags & RBBI_BOF_REQUIRED) {
1182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        category = 2;
1183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        mode     = RBBI_START;
1184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // loop until we reach the start of the text or transition to state 0
1188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (;;) {
1190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (c == U_SENTINEL) {
1191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Reached end of input string.
1192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (mode == RBBI_END) {
1193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // We have already run the loop one last time with the
1194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //   character set to the psueudo {eof} value.  Now it is time
1195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //   to unconditionally bail out.
1196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (lookaheadResult < result) {
1197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // We ran off the end of the string with a pending look-ahead match.
1198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // Treat this as if the look-ahead condition had been met, and return
1199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    //  the match at the / position from the look-ahead rule.
1200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    result               = lookaheadResult;
1201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    lookaheadStatus = 0;
1202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                } else if (result == initialPosition) {
1203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // Ran off start, no match found.
1204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // move one index one (towards the start, since we are doing a previous())
1205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UTEXT_SETNATIVEINDEX(fText, initialPosition);
1206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UTEXT_PREVIOUS32(fText);   // TODO:  shouldn't be necessary.  We're already at beginning.  Check.
1207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Run the loop one last time with the fake end-of-input character category.
1211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            mode = RBBI_END;
1212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            category = 1;
1213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
1216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Get the char category.  An incoming category of 1 or 2 means that
1217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //      we are preset for doing the beginning or end of input, and
1218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //      that we shouldn't get a category from an actual text input character.
1219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
1220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (mode == RBBI_RUN) {
1221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // look up the current character's character category, which tells us
1222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // which column in the state table to look at.
1223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Note:  the 16 in UTRIE_GET16 refers to the size of the data being returned,
1224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //        not the size of the character going in, which is a UChar32.
1225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //
1226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UTRIE_GET16(&fData->fTrie, c, category);
1227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Check the dictionary bit in the character's category.
1229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //    Counter is only used by dictionary based iterators (subclasses).
1230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //    Chars that need to be handled by a dictionary have a flag bit set
1231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //    in their category values.
1232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //
1233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if ((category & 0x4000) != 0)  {
1234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fDictionaryCharCount++;
1235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //  And off the dictionary flag bit.
1236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                category &= ~0x4000;
1237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        #ifdef RBBI_DEBUG
1241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fTrace) {
1242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                RBBIDebugPrintf("             %4d   ", (int32_t)utext_getNativeIndex(fText));
1243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (0x20<=c && c<0x7f) {
1244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    RBBIDebugPrintf("\"%c\"  ", c);
1245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                } else {
1246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    RBBIDebugPrintf("%5x  ", c);
1247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                RBBIDebugPrintf("%3d  %3d\n", state, category);
1249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        #endif
1251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // State Transition - move machine to its next state
1253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //
1254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        state = row->fNextState[category];
1255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        row = (RBBIStateTableRow *)
1256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (statetable->fTableData + (statetable->fRowLen * state));
1257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (row->fAccepting == -1) {
1259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Match found, common case.
1260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
1261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (row->fLookAhead != 0) {
1264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (lookaheadStatus != 0
1265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                && row->fAccepting == lookaheadStatus) {
1266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Lookahead match is completed.
1267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                result               = lookaheadResult;
1268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                lookaheadStatus      = 0;
1269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // TODO:  make a standalone hard break in a rule work.
1270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (lookAheadHardBreak) {
1271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UTEXT_SETNATIVEINDEX(fText, result);
1272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return result;
1273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // Look-ahead completed, but other rules may match further.  Continue on
1275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //  TODO:  junk this feature?  I don't think it's used anywhwere.
1276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                goto continueOn;
1277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t  r = (int32_t)UTEXT_GETNATIVEINDEX(fText);
1280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lookaheadResult = r;
1281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lookaheadStatus = row->fLookAhead;
1282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            goto continueOn;
1283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (row->fAccepting != 0) {
1287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Because this is an accepting state, any in-progress look-ahead match
1288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   is no longer relavant.  Clear out the pending lookahead status.
1289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            lookaheadStatus = 0;
1290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)continueOn:
1293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (state == STOP_STATE) {
1294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // This is the normal exit from the lookup state machine.
1295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // We have advanced through the string until it is certain that no
1296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //   longer match is possible, no matter what characters follow.
1297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
1298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Move (backwards) to the next character to process.
1301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // If this is a beginning-of-input loop iteration, don't advance
1302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    the input position.  The next iteration will be processing the
1303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //    first real input character.
1304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (mode == RBBI_RUN) {
1305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c = UTEXT_PREVIOUS32(fText);
1306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (mode == RBBI_START) {
1308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                mode = RBBI_RUN;
1309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // The state machine is done.  Check whether it found a match...
1314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // If the iterator failed to advance in the match engine, force it ahead by one.
1316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   (This really indicates a defect in the break rules.  They should always match
1317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //    at least one character.)
1318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (result == initialPosition) {
1319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTEXT_SETNATIVEINDEX(fText, initialPosition);
1320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTEXT_PREVIOUS32(fText);
1321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result = (int32_t)UTEXT_GETNATIVEINDEX(fText);
1322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Leave the iterator at our result position.
1325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTEXT_SETNATIVEINDEX(fText, result);
1326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    #ifdef RBBI_DEBUG
1327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fTrace) {
1328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            RBBIDebugPrintf("result = %d\n\n", result);
1329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    #endif
1331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
1332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
1336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::reset()
1337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fCachedBreakPositions) {
1339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_free(fCachedBreakPositions);
1340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fCachedBreakPositions = NULL;
1342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNumCachedBreakPositions = 0;
1343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fDictionaryCharCount = 0;
1344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fPositionInCache = 0;
1345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   getRuleStatus()   Return the break rule tag associated with the current
1352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                     iterator position.  If the iterator arrived at its current
1353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                     position by iterating forwards, the value will have been
1354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                     cached by the handleNext() function.
1355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                     If no cached status value is available, the status is
1357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                     found by doing a previous() followed by a next(), which
1358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                     leaves the iterator where it started, and computes the
1359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                     status while doing the next().
1360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RuleBasedBreakIterator::makeRuleStatusValid() {
1363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fLastStatusIndexValid == FALSE) {
1364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //  No cached status is available.
1365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fText == NULL || current() == 0) {
1366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //  At start of text, or there is no text.  Status is always zero.
1367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fLastRuleStatusIndex = 0;
1368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fLastStatusIndexValid = TRUE;
1369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //  Not at start of text.  Find status the tedious way.
1371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t pa = current();
1372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            previous();
1373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (fNumCachedBreakPositions > 0) {
1374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                reset();                // Blow off the dictionary cache
1375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t pb = next();
1377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (pa != pb) {
1378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // note: the if (pa != pb) test is here only to eliminate warnings for
1379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //       unused local variables on gcc.  Logically, it isn't needed.
1380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                U_ASSERT(pa == pb);
1381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    U_ASSERT(fLastRuleStatusIndex >= 0  &&  fLastRuleStatusIndex < fData->fStatusMaxIdx);
1385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t  RuleBasedBreakIterator::getRuleStatus() const {
1389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator *nonConstThis  = (RuleBasedBreakIterator *)this;
1390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    nonConstThis->makeRuleStatusValid();
1391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // fLastRuleStatusIndex indexes to the start of the appropriate status record
1393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //                                                 (the number of status values.)
1394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   This function returns the last (largest) of the array of status values.
1395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t  idx = fLastRuleStatusIndex + fData->fRuleStatusTable[fLastRuleStatusIndex];
1396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t  tagVal = fData->fRuleStatusTable[idx];
1397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return tagVal;
1399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::getRuleStatusVec(
1405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             int32_t *fillInVec, int32_t capacity, UErrorCode &status)
1406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
1408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
1409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator *nonConstThis  = (RuleBasedBreakIterator *)this;
1412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    nonConstThis->makeRuleStatusValid();
1413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t  numVals = fData->fRuleStatusTable[fLastRuleStatusIndex];
1414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t  numValsToCopy = numVals;
1415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (numVals > capacity) {
1416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_BUFFER_OVERFLOW_ERROR;
1417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        numValsToCopy = capacity;
1418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int i;
1420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i=0; i<numValsToCopy; i++) {
1421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fillInVec[i] = fData->fRuleStatusTable[fLastRuleStatusIndex + i + 1];
1422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return numVals;
1424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//   getBinaryRules        Access to the compiled form of the rules,
1431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                         for use by build system tools that save the data
1432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                         for standard iterator types.
1433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const uint8_t  *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) {
1436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const uint8_t  *retPtr = NULL;
1437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    length = 0;
1438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData != NULL) {
1440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        retPtr = (const uint8_t *)fData->fHeader;
1441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        length = fData->fHeader->fLength;
1442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return retPtr;
1444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  BufferClone       TODO:  In my (Andy) opinion, this function should be deprecated.
1452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                    Saving one heap allocation isn't worth the trouble.
1453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                    Cloning shouldn't be done in tight loops, and
1454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                    making the clone copy involves other heap operations anyway.
1455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                    And the application code for correctly dealing with buffer
1456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                    size problems and the eventual object destruction is ugly.
1457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)BreakIterator *  RuleBasedBreakIterator::createBufferClone(void *stackBuffer,
1460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                   int32_t &bufferSize,
1461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                   UErrorCode &status)
1462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)){
1464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
1465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  If user buffer size is zero this is a preflight operation to
1469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //    obtain the needed buffer size, allowing for worst case misalignment.
1470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (bufferSize == 0) {
1472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        bufferSize = sizeof(RuleBasedBreakIterator) + U_ALIGNMENT_OFFSET_UP(0);
1473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
1474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Check the alignment and size of the user supplied buffer.
1479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Allocate heap memory if the user supplied memory is insufficient.
1480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char    *buf   = (char *)stackBuffer;
1482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t s      = bufferSize;
1483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (stackBuffer == NULL) {
1485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        s = 0;   // Ignore size, force allocation if user didn't give us a buffer.
1486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
1488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uint32_t offsetUp = (uint32_t)U_ALIGNMENT_OFFSET_UP(buf);
1489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        s   -= offsetUp;
1490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        buf += offsetUp;
1491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (s < sizeof(RuleBasedBreakIterator)) {
1493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Not enough room in the caller-supplied buffer.
1494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Do a plain-vanilla heap based clone and return that, along with
1495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //   a warning that the clone was allocated.
1496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        RuleBasedBreakIterator *clonedBI = new RuleBasedBreakIterator(*this);
1497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (clonedBI == 0) {
1498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            status = U_MEMORY_ALLOCATION_ERROR;
1499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            status = U_SAFECLONE_ALLOCATED_WARNING;
1501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return clonedBI;
1503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //  Clone the source BI into the caller-supplied buffer.
1507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    RuleBasedBreakIterator *clone = new(buf) RuleBasedBreakIterator(*this);
1509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    clone->fBufferClone = TRUE;   // Flag to prevent deleting storage on close (From C code)
1510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return clone;
1512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  isDictionaryChar      Return true if the category lookup for this char
1518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                        indicates that it is in the set of dictionary lookup
1519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                        chars.
1520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                        This function is intended for use by dictionary based
1522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                        break iterators.
1523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*UBool RuleBasedBreakIterator::isDictionaryChar(UChar32   c) {
1526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fData == NULL) {
1527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
1528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint16_t category;
1530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_GET16(&fData->fTrie, c, category);
1531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (category & 0x4000) != 0;
1532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}*/
1533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  checkDictionary       This function handles all processing of characters in
1538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                        the "dictionary" set. It will determine the appropriate
1539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                        course of action, and possibly set up a cache in the
1540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                        process.
1541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
1544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            int32_t endPos,
1545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            UBool reverse) {
1546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Reset the old break cache first.
1547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();
1548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // note: code segment below assumes that dictionary chars are in the
1550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // startPos-endPos range
1551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // value returned should be next character in sequence
1552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if ((endPos - startPos) <= 1) {
1553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return (reverse ? startPos : endPos);
1554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Bug 5532.  The dictionary code will crash if the input text is UTF-8
1557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //      because native indexes are different from UTF-16 indexes.
1558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //      Temporary hack: skip dictionary lookup for UTF-8 encoded text.
1559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //      It wont give the right breaks, but it's better than a crash.
1560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //      Check the type of the UText by checking its pFuncs field, which
1562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //      is UText's function dispatch table.  It will be the same for all
1563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //      UTF-8 UTexts and different for any other UText type.
1564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //
1565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //      We have no other type of UText available with non-UTF-16 native indexing.
1566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //      This whole check will go away once the dictionary code is fixed.
1567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const void *utext_utf8Funcs;
1568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (utext_utf8Funcs == NULL) {
1569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Cache the UTF-8 UText function pointer value.
1570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UText tempUText = UTEXT_INITIALIZER;
1572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utext_openUTF8(&tempUText, NULL, 0, &status);
1573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utext_utf8Funcs = tempUText.pFuncs;
1574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utext_close(&tempUText);
1575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fText->pFuncs == utext_utf8Funcs) {
1577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return (reverse ? startPos : endPos);
1578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Starting from the starting point, scan towards the proposed result,
1581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // looking for the first dictionary character (which may be the one
1582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // we're on, if we're starting in the middle of a range).
1583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_setNativeIndex(fText, reverse ? endPos : startPos);
1584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (reverse) {
1585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTEXT_PREVIOUS32(fText);
1586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t rangeStart = startPos;
1589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t rangeEnd = endPos;
1590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint16_t    category;
1592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     current;
1593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
1594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UStack      breaks(status);
1595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t     foundBreakCount = 0;
1596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32     c = utext_current32(fText);
1597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_GET16(&fData->fTrie, c, category);
1599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Is the character we're starting on a dictionary character? If so, we
1601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // need to back up to include the entire run; otherwise the results of
1602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the break algorithm will differ depending on where we start. Since
1603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the result is cached and there is typically a non-dictionary break
1604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // within a small number of words, there should be little performance impact.
1605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (category & 0x4000) {
1606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (reverse) {
1607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            do {
1608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                utext_next32(fText);          // TODO:  recast to work directly with postincrement.
1609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c = utext_current32(fText);
1610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UTRIE_GET16(&fData->fTrie, c, category);
1611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } while (c != U_SENTINEL && (category & 0x4000));
1612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Back up to the last dictionary character
1613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            rangeEnd = (int32_t)UTEXT_GETNATIVEINDEX(fText);
1614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (c == U_SENTINEL) {
1615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // c = fText->last32();
1616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                //   TODO:  why was this if needed?
1617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c = UTEXT_PREVIOUS32(fText);
1618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            else {
1620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c = UTEXT_PREVIOUS32(fText);
1621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        else {
1624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            do {
1625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c = UTEXT_PREVIOUS32(fText);
1626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UTRIE_GET16(&fData->fTrie, c, category);
1627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while (c != U_SENTINEL && (category & 0x4000));
1629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Back up to the last dictionary character
1630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (c == U_SENTINEL) {
1631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // c = fText->first32();
1632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c = utext_current32(fText);
1633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            else {
1635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                utext_next32(fText);
1636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c = utext_current32(fText);
1637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            rangeStart = (int32_t)UTEXT_GETNATIVEINDEX(fText);;
1639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTRIE_GET16(&fData->fTrie, c, category);
1641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Loop through the text, looking for ranges of dictionary characters.
1644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // For each span, find the appropriate break engine, and ask it to find
1645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // any breaks within the span.
1646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Note: we always do this in the forward direction, so that the break
1647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // cache is built in the right order.
1648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (reverse) {
1649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utext_setNativeIndex(fText, rangeStart);
1650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        c = utext_current32(fText);
1651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTRIE_GET16(&fData->fTrie, c, category);
1652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(U_SUCCESS(status)) {
1654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) < rangeEnd && (category & 0x4000) == 0) {
1655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utext_next32(fText);           // TODO:  tweak for post-increment operation
1656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c = utext_current32(fText);
1657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UTRIE_GET16(&fData->fTrie, c, category);
1658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (current >= rangeEnd) {
1660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
1661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // We now have a dictionary character. Get the appropriate language object
1664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // to deal with it.
1665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        const LanguageBreakEngine *lbe = getLanguageBreakEngine(c);
1666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Ask the language object if there are any breaks. It will leave the text
1668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // pointer on the other side of its range, ready to search for the next one.
1669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (lbe != NULL) {
1670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            foundBreakCount += lbe->findBreaks(fText, rangeStart, rangeEnd, FALSE, fBreakType, breaks);
1671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Reload the loop variables for the next go-round
1674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        c = utext_current32(fText);
1675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTRIE_GET16(&fData->fTrie, c, category);
1676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // If we found breaks, build a new break cache. The first and last entries must
1679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // be the original starting and ending position.
1680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (foundBreakCount > 0) {
1681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t totalBreaks = foundBreakCount;
1682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (startPos < breaks.elementAti(0)) {
1683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            totalBreaks += 1;
1684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (endPos > breaks.peeki()) {
1686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            totalBreaks += 1;
1687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fCachedBreakPositions = (int32_t *)uprv_malloc(totalBreaks * sizeof(int32_t));
1689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fCachedBreakPositions != NULL) {
1690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t out = 0;
1691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fNumCachedBreakPositions = totalBreaks;
1692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (startPos < breaks.elementAti(0)) {
1693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fCachedBreakPositions[out++] = startPos;
1694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for (int32_t i = 0; i < foundBreakCount; ++i) {
1696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fCachedBreakPositions[out++] = breaks.elementAti(i);
1697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (endPos > fCachedBreakPositions[out-1]) {
1699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fCachedBreakPositions[out] = endPos;
1700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // If there are breaks, then by definition, we are replacing the original
1702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // proposed break by one of the breaks we found. Use following() and
1703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // preceding() to do the work. They should never recurse in this case.
1704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (reverse) {
1705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return preceding(endPos);
1706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            else {
1708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return following(startPos);
1709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // If the allocation failed, just fall through to the "no breaks found" case.
1712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // If we get here, there were no language-based breaks. Set the text pointer
1715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // to the original proposed break.
1716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utext_setNativeIndex(fText, reverse ? startPos : endPos);
1717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (reverse ? startPos : endPos);
1718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END
1721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// defined in ucln_cmn.h
1723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_NAMESPACE_QUALIFIER UStack *gLanguageBreakFactories = NULL;
1725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Release all static memory held by breakiterator.
1728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN
1730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UBool U_CALLCONV breakiterator_cleanup_dict(void) {
1731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (gLanguageBreakFactories) {
1732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete gLanguageBreakFactories;
1733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        gLanguageBreakFactories = NULL;
1734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return TRUE;
1736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END
1738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN
1740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void U_CALLCONV _deleteFactory(void *obj) {
1741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete (U_NAMESPACE_QUALIFIER LanguageBreakFactory *) obj;
1742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END
1744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN
1745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const LanguageBreakEngine*
1747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType)
1748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
1749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool       needsInit;
1750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode  status = U_ZERO_ERROR;
1751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UMTX_CHECK(NULL, (UBool)(gLanguageBreakFactories == NULL), needsInit);
1752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (needsInit) {
1754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UStack  *factories = new UStack(_deleteFactory, NULL, status);
1755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (factories != NULL && U_SUCCESS(status)) {
1756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status);
1757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            factories->push(builtIn, status);
1758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef U_LOCAL_SERVICE_HOOK
1759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status);
1760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (extra != NULL) {
1761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                factories->push(extra, status);
1762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
1764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        umtx_lock(NULL);
1766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (gLanguageBreakFactories == NULL) {
1767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            gLanguageBreakFactories = factories;
1768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            factories = NULL;
1769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR_DICT, breakiterator_cleanup_dict);
1770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        umtx_unlock(NULL);
1772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete factories;
1773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (gLanguageBreakFactories == NULL) {
1776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
1777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i = gLanguageBreakFactories->size();
1780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const LanguageBreakEngine *lbe = NULL;
1781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (--i >= 0) {
1782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i));
1783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        lbe = factory->getEngineFor(c, breakType);
1784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (lbe != NULL) {
1785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
1786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return lbe;
1789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//  getLanguageBreakEngine  Find an appropriate LanguageBreakEngine for the
1795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//                          the characer c.
1796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//
1797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------
1798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const LanguageBreakEngine *
1799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
1800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const LanguageBreakEngine *lbe = NULL;
1801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
1802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fLanguageBreakEngines == NULL) {
1804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fLanguageBreakEngines = new UStack(status);
1805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (fLanguageBreakEngines == NULL || U_FAILURE(status)) {
1806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            delete fLanguageBreakEngines;
1807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fLanguageBreakEngines = 0;
1808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return NULL;
1809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i = fLanguageBreakEngines->size();
1813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (--i >= 0) {
1814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
1815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (lbe->handles(c, fBreakType)) {
1816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return lbe;
1817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // No existing dictionary took the character. See if a factory wants to
1821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // give us a new LanguageBreakEngine for this character.
1822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    lbe = getLanguageBreakEngineFromFactory(c, fBreakType);
1823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // If we got one, use it and push it on our stack.
1825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (lbe != NULL) {
1826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fLanguageBreakEngines->push((void *)lbe, status);
1827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Even if we can't remember it, we can keep looking it up, so
1828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // return it even if the push fails.
1829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return lbe;
1830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // No engine is forthcoming for this character. Add it to the
1833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // reject set. Create the reject break engine if needed.
1834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (fUnhandledBreakEngine == NULL) {
1835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fUnhandledBreakEngine = new UnhandledEngine(status);
1836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) {
1837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            status = U_MEMORY_ALLOCATION_ERROR;
1838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Put it last so that scripts for which we have an engine get tried
1840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // first.
1841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fLanguageBreakEngines->insertElementAt(fUnhandledBreakEngine, 0, status);
1842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // If we can't insert it, or creation failed, get rid of it
1843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(status)) {
1844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            delete fUnhandledBreakEngine;
1845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fUnhandledBreakEngine = 0;
1846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return NULL;
1847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Tell the reject engine about the character; at its discretion, it may
1851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // add more than just the one character.
1852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fUnhandledBreakEngine->handleCharacter(c, fBreakType);
1853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fUnhandledBreakEngine;
1855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*int32_t RuleBasedBreakIterator::getBreakType() const {
1860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fBreakType;
1861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}*/
1862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RuleBasedBreakIterator::setBreakType(int32_t type) {
1864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fBreakType = type;
1865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();
1866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END
1869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1871