1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*************************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 1999-2010 International Business Machines Corporation 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* and others. All rights reserved. 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*************************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// file: rbbi.c Contains the implementation of the rule based break iterator 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// runtime engine and the API implementation for 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// class RuleBasedBreakIterator 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypeinfo.h" // for 'typeid' to work 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_BREAK_ITERATION 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/rbbi.h" 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/schriter.h" 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchriter.h" 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/udata.h" 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uclean.h" 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "rbbidata.h" 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "rbbirb.h" 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cstring.h" 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "umutex.h" 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucln_cmn.h" 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "brkeng.h" 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uassert.h" 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvector.h" 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// if U_LOCAL_SERVICE_HOOK is defined, then localsvc.cpp is expected to be included. 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if U_LOCAL_SERVICE_HOOK 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "localsvc.h" 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef RBBI_DEBUG 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UBool fTrace = FALSE; 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The state number of the starting state 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define START_STATE 1 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The state-transition value indicating "stop" 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define STOP_STATE 0 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator) 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//======================================================================= 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// constructors 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//======================================================================= 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Constructs a RuleBasedBreakIterator that uses the already-created 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * tables object that is passed in as a parameter. 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fData = new RBBIDataWrapper(data, status); // status checked in constructor 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) {return;} 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(fData == 0) { 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Same as above but does not adopt memory 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status) 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status); // status checked in constructor 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) {return;} 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(fData == 0) { 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Constructor from a UDataMemory handle to precompiled break rules 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// stored in an ICU data file. 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status) 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fData = new RBBIDataWrapper(udm, status); // status checked in constructor 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) {return;} 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(fData == 0) { 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Constructor from a set of rules supplied as a string. 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules, 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError &parseError, 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) {return;} 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *) 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIRuleBuilder::createRuleBasedBreakIterator(rules, &parseError, status); 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Note: This is a bit awkward. The RBBI ruleBuilder has a factory method that 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // creates and returns a complete RBBI. From here, in a constructor, we 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // can't just return the object created by the builder factory, hence 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the assignment of the factory created object to "this". 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *this = *bi; 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete bi; 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Default Constructor. Create an empty shell that can be set up later. 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Used when creating a RuleBasedBreakIterator from a set 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// of rules. 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator() { 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Copy constructor. Will produce a break iterator with the same behavior, 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// and which iterates over the same text, as the one passed in. 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other) 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles): BreakIterator(other) 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) this->init(); 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *this = other; 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Destructor 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::~RuleBasedBreakIterator() { 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // fCharIter was adopted from the outside. 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCharIter; 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCharIter = NULL; 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSCharIter; 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCharIter = NULL; 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fDCharIter; 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDCharIter = NULL; 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_close(fText); 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData != NULL) { 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fData->removeReference(); 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fData = NULL; 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCachedBreakPositions) { 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(fCachedBreakPositions); 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCachedBreakPositions = NULL; 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fLanguageBreakEngines) { 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fLanguageBreakEngines; 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLanguageBreakEngines = NULL; 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fUnhandledBreakEngine) { 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fUnhandledBreakEngine; 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fUnhandledBreakEngine = NULL; 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Assignment operator. Sets this iterator to have the same behavior, 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and iterate over the same text, as the one passed in. 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator& 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) { 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (this == &that) { 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *this; 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); // Delete break cache information 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBreakType = that.fBreakType; 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fLanguageBreakEngines != NULL) { 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fLanguageBreakEngines; 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLanguageBreakEngines = NULL; // Just rebuild for now 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: clone fLanguageBreakEngines from "that" 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fText = utext_clone(fText, that.fText, FALSE, TRUE, &status); 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCharIter; 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCharIter = NULL; 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (that.fCharIter != NULL ) { 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This is a little bit tricky - it will intially appear that 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // this->fCharIter is adopted, even if that->fCharIter was 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // not adopted. That's ok. 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCharIter = that.fCharIter->clone(); 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData != NULL) { 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fData->removeReference(); 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fData = NULL; 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (that.fData != NULL) { 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fData = that.fData->addReference(); 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *this; 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------- 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// init() Shared initialization routine. Used by all the constructors. 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Initializes all fields, leaving the object in a consistent state. 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------- 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RuleBasedBreakIterator::init() { 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBufferClone = FALSE; 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fText = utext_openUChars(NULL, NULL, 0, &status); 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCharIter = NULL; 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSCharIter = NULL; 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDCharIter = NULL; 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fData = NULL; 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = 0; 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = TRUE; 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDictionaryCharCount = 0; 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // dictionary behavior for Break Iterators that are 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // built from rules. Even better would be the ability to 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // declare the type in the rules. 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCachedBreakPositions = NULL; 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLanguageBreakEngines = NULL; 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fUnhandledBreakEngine = NULL; 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumCachedBreakPositions = 0; 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPositionInCache = 0; 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef RBBI_DEBUG 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static UBool debugInitDone = FALSE; 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (debugInitDone == FALSE) { 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *debugEnv = getenv("U_RBBIDEBUG"); 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (debugEnv && uprv_strstr(debugEnv, "trace")) { 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fTrace = TRUE; 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) debugInitDone = TRUE; 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------- 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// clone - Returns a newly-constructed RuleBasedBreakIterator with the same 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// behavior, and iterating over the same text, as this one. 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Virtual function: does the right thing with subclasses. 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------- 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)BreakIterator* 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::clone(void) const { 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return new RuleBasedBreakIterator(*this); 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Equality operator. Returns TRUE if both BreakIterators are of the 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * same class, have the same behavior, and iterate over the same text. 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::operator==(const BreakIterator& that) const { 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (typeid(*this) != typeid(that)) { 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that; 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!utext_equals(fText, that2.fText)) { 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The two break iterators are operating on different text, 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // or have a different interation position. 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: need a check for when in a dictionary region at different offsets. 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (that2.fData == fData || 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fData != NULL && that2.fData != NULL && *that2.fData == *fData)) { 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The two break iterators are using the same rules. 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Compute a hash code for this BreakIterator 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return A hash code 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::hashCode(void) const { 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t hash = 0; 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData != NULL) { 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) hash = fData->hashCode(); 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return hash; 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) { 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fText = utext_clone(fText, ut, FALSE, TRUE, &status); 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Set up a dummy CharacterIterator to be returned if anyone 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // calls getText(). With input from UText, there is no reasonable 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // way to return a characterIterator over the actual input text. 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Return one over an empty string instead - this is the closest 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we can come to signaling a failure. 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (GetText() is obsolete, this failure is sort of OK) 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fDCharIter == NULL) { 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UChar c = 0; 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDCharIter = new UCharCharacterIterator(&c, 0); 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fDCharIter == NULL) { 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // existing fCharIter was adopted from the outside. Delete it now. 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCharIter; 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCharIter = fDCharIter; 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) this->first(); 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const { 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UText *result = utext_clone(fillIn, fText, FALSE, TRUE, &status); 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the description used to create this iterator 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const UnicodeString& 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::getRules() const { 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData != NULL) { 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fData->getRuleSourceString(); 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const UnicodeString *s; 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (s == NULL) { 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: something more elegant here. 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // perhaps API should return the string by value. 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Note: thread unsafe init & leak are semi-ok, better than 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // what was before. Sould be cleaned up, though. 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s = new UnicodeString; 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *s; 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//======================================================================= 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// BreakIterator overrides 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//======================================================================= 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return a CharacterIterator over the text being analyzed. 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)CharacterIterator& 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::getText() const { 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *fCharIter; 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set the iterator to analyze a new piece of text. This function resets 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the current iteration position to the beginning of the text. 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param newText An iterator over the text to analyze. 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::adoptText(CharacterIterator* newText) { 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If we are holding a CharacterIterator adopted from a 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // previous call to this function, delete it now. 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCharIter; 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCharIter = newText; 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (newText==NULL || newText->startIndex() != 0) { 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // startIndex !=0 wants to be an error, but there's no way to report it. 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Make the iterator text be an empty string. 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fText = utext_openUChars(fText, NULL, 0, &status); 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fText = utext_openCharacterIterator(fText, newText, &status); 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) this->first(); 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set the iterator to analyze a new piece of text. This function resets 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the current iteration position to the beginning of the text. 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param newText An iterator over the text to analyze. 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::setText(const UnicodeString& newText) { 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fText = utext_openConstUnicodeString(fText, &newText, &status); 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Set up a character iterator on the string. 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Needed in case someone calls getText(). 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Can not, unfortunately, do this lazily on the (probably never) 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // call to getText(), because getText is const. 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSCharIter == NULL) { 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSCharIter = new StringCharacterIterator(newText); 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSCharIter->setText(newText); 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCharIter!=fSCharIter && fCharIter!=fDCharIter) { 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // old fCharIter was adopted from the outside. Delete it. 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCharIter; 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCharIter = fSCharIter; 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) this->first(); 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sets the current iteration position to the beginning of the text. 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The offset of the beginning of the text. 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::first(void) { 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = 0; 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = TRUE; 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //if (fText == NULL) 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // return BreakIterator::DONE; 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, 0); 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sets the current iteration position to the end of the text. 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The text's past-the-end offset. 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::last(void) { 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fText == NULL) { 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = 0; 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = TRUE; 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return BreakIterator::DONE; 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = FALSE; 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t pos = (int32_t)utext_nativeLength(fText); 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, pos); 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return pos; 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Advances the iterator either forward or backward the specified number of steps. 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Negative values move backward, and positive values move forward. This is 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * equivalent to repeatedly calling next() or previous(). 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param n The number of steps to move. The sign indicates the direction 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (negative is backwards, and positive is forwards). 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The character offset of the boundary position n boundaries away from 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the current one. 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::next(int32_t n) { 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = current(); 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (n > 0) { 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = next(); 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --n; 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (n < 0) { 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = previous(); 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++n; 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Advances the iterator to the next boundary position. 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The position of the first boundary after this one. 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::next(void) { 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if we have cached break positions and we're still in the range 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // covered by them, just move one step forward in the cache 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCachedBreakPositions != NULL) { 523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fPositionInCache < fNumCachedBreakPositions - 1) { 524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++fPositionInCache; 525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t pos = fCachedBreakPositions[fPositionInCache]; 526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, pos); 527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return pos; 528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t startPos = current(); 535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = handleNext(fData->fForwardTable); 536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fDictionaryCharCount > 0) { 537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = checkDictionary(startPos, result, FALSE); 538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Advances the iterator backwards, to the last boundary preceding this one. 544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The position of the last boundary position preceding this one. 545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::previous(void) { 547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result; 548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t startPos; 549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if we have cached break positions and we're still in the range 551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // covered by them, just move one step backward in the cache 552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCachedBreakPositions != NULL) { 553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fPositionInCache > 0) { 554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --fPositionInCache; 555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If we're at the beginning of the cache, need to reevaluate the 556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // rule status 557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fPositionInCache <= 0) { 558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = FALSE; 559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t pos = fCachedBreakPositions[fPositionInCache]; 561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, pos); 562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return pos; 563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if we're already sitting at the beginning of the text, return DONE 570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fText == NULL || (startPos = current()) == 0) { 571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = 0; 572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = TRUE; 573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return BreakIterator::DONE; 574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData->fSafeRevTable != NULL || fData->fSafeFwdTable != NULL) { 577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = handlePrevious(fData->fReverseTable); 578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fDictionaryCharCount > 0) { 579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = checkDictionary(result, startPos, TRUE); 580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // old rule syntax 585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // set things up. handlePrevious() will back us up to some valid 586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // break position before the current position (we back our internal 587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // iterator up one step to prevent handlePrevious() from returning 588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the current position), but not necessarily the last one before 589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // where we started 591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t start = current(); 593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_PREVIOUS32(fText); 595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lastResult = handlePrevious(fData->fReverseTable); 596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (lastResult == UBRK_DONE) { 597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lastResult = 0; 598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, 0); 599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = lastResult; 601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lastTag = 0; 602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool breakTagValid = FALSE; 603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // iterate forward from the known break position until we pass our 605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // starting point. The last break position before the starting 606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // point is our return value 607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = next(); 610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (result == BreakIterator::DONE || result >= start) { 611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lastResult = result; 614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lastTag = fLastRuleStatusIndex; 615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) breakTagValid = TRUE; 616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // fLastBreakTag wants to have the value for section of text preceding 619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the result position that we are to return (in lastResult.) If 620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the backwards rules overshot and the above loop had to do two or more 621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // next()s to move up to the desired return position, we will have a valid 622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // tag value. But, if handlePrevious() took us to exactly the correct result positon, 623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we wont have a tag value for that position, which is only set by handleNext(). 624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // set the current iteration position to be the last break position 626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // before where we started, and then return that value 627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, lastResult); 628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = lastTag; // for use by getRuleStatus() 629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = breakTagValid; 630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No need to check the dictionary; it will have been handled by 632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // next() 633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return lastResult; 635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sets the iterator to refer to the first boundary position following 639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the specified position. 640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @offset The position from which to begin searching for a break position. 641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The position of the first break after the current position. 642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::following(int32_t offset) { 644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if we have cached break positions and offset is in the range 645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // covered by them, use them 646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: could use binary search 647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: what if offset is outside range, but break is not? 648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCachedBreakPositions != NULL) { 649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset >= fCachedBreakPositions[0] 650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && offset < fCachedBreakPositions[fNumCachedBreakPositions - 1]) { 651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPositionInCache = 0; 652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We are guaranteed not to leave the array due to range test above 653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (offset >= fCachedBreakPositions[fPositionInCache]) { 654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++fPositionInCache; 655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t pos = fCachedBreakPositions[fPositionInCache]; 657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, pos); 658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return pos; 659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if the offset passed in is already past the end of the text, 666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // just return DONE; if it's before the beginning, return the 667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // text's starting offset 668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = 0; 669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = TRUE; 670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fText == NULL || offset >= utext_nativeLength(fText)) { 671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) last(); 672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return next(); 673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else if (offset < 0) { 675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return first(); 676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // otherwise, set our internal iteration position (temporarily) 679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // to the position passed in. If this is the _beginning_ position, 680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // then we can just use next() to get our return value 681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = 0; 683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData->fSafeRevTable != NULL) { 685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // new rule syntax 686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, offset); 687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // move forward one codepoint to prepare for moving back to a 688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // safe point. 689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // this handles offset being between a supplementary character 690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_NEXT32(fText); 691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // handlePrevious will move most of the time to < 1 boundary away 692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) handlePrevious(fData->fSafeRevTable); 693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = next(); 694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (result <= offset) { 695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = next(); 696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData->fSafeFwdTable != NULL) { 700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // backup plan if forward safe table is not available 701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, offset); 702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_PREVIOUS32(fText); 703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // handle next will give result >= offset 704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) handleNext(fData->fSafeFwdTable); 705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // previous will give result 0 or 1 boundary away from offset, 706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // most of the time 707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we have to 708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t oldresult = previous(); 709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (oldresult > offset) { 710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = previous(); 711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (result <= offset) { 712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return oldresult; 713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) oldresult = result; 715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = next(); 717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (result <= offset) { 718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return next(); 719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // otherwise, we have to sync up first. Use handlePrevious() to back 723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // up to a known break position before the specified position (if 724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we can determine that the specified position is a break position, 725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we don't back up at all). This may or may not be the last break 726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // position at or before our starting position. Advance forward 727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // from here until we've passed the starting position. The position 728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we stop on will be the first break position after the specified one. 729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // old rule syntax 730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, offset); 732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset==0 || 733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (offset==1 && utext_getNativeIndex(fText)==0)) { 734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return next(); 735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = previous(); 737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (result != BreakIterator::DONE && result <= offset) { 739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = next(); 740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sets the iterator to refer to the last boundary position before the 747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * specified position. 748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @offset The position to begin searching for a break from. 749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The position of the last boundary before the starting position. 750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::preceding(int32_t offset) { 752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if we have cached break positions and offset is in the range 753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // covered by them, use them 754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCachedBreakPositions != NULL) { 755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: binary search? 756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: What if offset is outside range, but break is not? 757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset > fCachedBreakPositions[0] 758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && offset <= fCachedBreakPositions[fNumCachedBreakPositions - 1]) { 759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPositionInCache = 0; 760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (fPositionInCache < fNumCachedBreakPositions 761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && offset > fCachedBreakPositions[fPositionInCache]) 762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++fPositionInCache; 763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --fPositionInCache; 764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If we're at the beginning of the cache, need to reevaluate the 765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // rule status 766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fPositionInCache <= 0) { 767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = FALSE; 768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, fCachedBreakPositions[fPositionInCache]); 770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fCachedBreakPositions[fPositionInCache]; 771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if the offset passed in is already past the end of the text, 778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // just return DONE; if it's before the beginning, return the 779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // text's starting offset 780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fText == NULL || offset > utext_nativeLength(fText)) { 781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // return BreakIterator::DONE; 782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return last(); 783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else if (offset < 0) { 785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return first(); 786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if we start by updating the current iteration position to the 789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // position specified by the caller, we can just use previous() 790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // to carry out this operation 791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData->fSafeFwdTable != NULL) { 793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // new rule syntax 794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, offset); 795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t newOffset = (int32_t)UTEXT_GETNATIVEINDEX(fText); 796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (newOffset != offset) { 797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Will come here if specified offset was not a code point boundary AND 798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the underlying implmentation is using UText, which snaps any non-code-point-boundary 799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // indices to the containing code point. 800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // For breakitereator::preceding only, these non-code-point indices need to be moved 801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // up to refer to the following codepoint. 802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_NEXT32(fText); 803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offset = (int32_t)UTEXT_GETNATIVEINDEX(fText); 804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: (synwee) would it be better to just check for being in the middle of a surrogate pair, 807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // rather than adjusting the position unconditionally? 808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (Change would interact with safe rules.) 809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: change RBBI behavior for off-boundary indices to match that of UText? 810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // affects only preceding(), seems cleaner, but is slightly different. 811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_PREVIOUS32(fText); 812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) handleNext(fData->fSafeFwdTable); 813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = (int32_t)UTEXT_GETNATIVEINDEX(fText); 814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (result >= offset) { 815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = previous(); 816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData->fSafeRevTable != NULL) { 820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // backup plan if forward safe table is not available 821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: check whether this path can be discarded 822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // It's probably OK to say that rules must supply both safe tables 823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if they use safe tables at all. We have certainly never described 824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // to anyone how to work with just one safe table. 825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, offset); 826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_NEXT32(fText); 827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // handle previous will give result <= offset 829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) handlePrevious(fData->fSafeRevTable); 830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // next will give result 0 or 1 boundary away from offset, 832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // most of the time 833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we have to 834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t oldresult = next(); 835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (oldresult < offset) { 836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = next(); 837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (result >= offset) { 838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return oldresult; 839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) oldresult = result; 841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = previous(); 843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (result >= offset) { 844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return previous(); 845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // old rule syntax 850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, offset); 851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return previous(); 852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns true if the specfied position is a boundary position. As a side 856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * effect, leaves the iterator pointing to the first boundary position at 857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or after "offset". 858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param offset the offset to check. 859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return True if "offset" is a boundary position. 860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool RuleBasedBreakIterator::isBoundary(int32_t offset) { 862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the beginning index of the iterator is always a boundary position by definition 863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset == 0) { 864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) first(); // For side effects on current position, tag values. 865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset == (int32_t)utext_nativeLength(fText)) { 869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) last(); // For side effects on current position, tag values. 870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // out-of-range indexes are never boundary positions 874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset < 0) { 875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) first(); // For side effects on current position, tag values. 876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (offset > utext_nativeLength(fText)) { 880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) last(); // For side effects on current position, tag values. 881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // otherwise, we can use following() on the position before the specified 885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // one and return true if the position we get back is the one the user 886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // specified 887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_previous32From(fText, offset); 888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t backOne = (int32_t)UTEXT_GETNATIVEINDEX(fText); 889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool result = following(backOne) == offset; 890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns the current iteration position. 895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The current iteration position. 896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::current(void) const { 898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(fText); 899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return pos; 900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//======================================================================= 903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// implementation 904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//======================================================================= 905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// RBBIRunMode - the state machine runs an extra iteration at the beginning and end 908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// of user text. A variable with this enum type keeps track of where we 909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// are. The state machine only fetches user input while in the RUN mode. 910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum RBBIRunMode { 912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBI_START, // state machine processing is before first char of input 913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBI_RUN, // state machine processing is in the user text 914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBI_END // state machine processing is after end of user text. 915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------------- 919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// handleNext(stateTable) 921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// This method is the actual implementation of the rbbi next() method. 922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// This method initializes the state machine to state 1 923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// and advances through the text character by character until we reach the end 924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// of the text or the state machine transitions to state 0. We update our return 925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// value every time the state machine passes through an accepting state. 926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------------- 928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) { 929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t state; 930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int16_t category = 0; 931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIRunMode mode; 932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIStateTableRow *row; 934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lookaheadStatus = 0; 936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lookaheadTagIdx = 0; 937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = 0; 938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t initialPosition = 0; 939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lookaheadResult = 0; 940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool lookAheadHardBreak = (statetable->fFlags & RBBI_LOOKAHEAD_HARD_BREAK) != 0; 941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *tableData = statetable->fTableData; 942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t tableRowLen = statetable->fRowLen; 943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #ifdef RBBI_DEBUG 945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fTrace) { 946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPuts("Handle Next pos char state category"); 947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #endif 949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No matter what, handleNext alway correctly sets the break tag value. 951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = TRUE; 952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = 0; 953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if we're already at the end of the text, return DONE. 955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText); 956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = initialPosition; 957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = UTEXT_NEXT32(fText); 958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData == NULL || c==U_SENTINEL) { 959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return BreakIterator::DONE; 960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Set the initial state for the state machine 963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state = START_STATE; 964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) row = (RBBIStateTableRow *) 965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //(statetable->fTableData + (statetable->fRowLen * state)); 966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (tableData + tableRowLen * state); 967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mode = RBBI_RUN; 970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (statetable->fFlags & RBBI_BOF_REQUIRED) { 971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) category = 2; 972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mode = RBBI_START; 973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // loop until we reach the end of the text or transition to state 0 977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == U_SENTINEL) { 980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Reached end of input string. 981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mode == RBBI_END) { 982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We have already run the loop one last time with the 983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // character set to the psueudo {eof} value. Now it is time 984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // to unconditionally bail out. 985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (lookaheadResult > result) { 986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We ran off the end of the string with a pending look-ahead match. 987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Treat this as if the look-ahead condition had been met, and return 988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the match at the / position from the look-ahead rule. 989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = lookaheadResult; 990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = lookaheadTagIdx; 991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lookaheadStatus = 0; 992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Run the loop one last time with the fake end-of-input character category. 996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mode = RBBI_END; 997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) category = 1; 998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Get the char category. An incoming category of 1 or 2 means that 1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we are preset for doing the beginning or end of input, and 1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // that we shouldn't get a category from an actual text input character. 1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mode == RBBI_RUN) { 1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // look up the current character's character category, which tells us 1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // which column in the state table to look at. 1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Note: the 16 in UTRIE_GET16 refers to the size of the data being returned, 1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // not the size of the character going in, which is a UChar32. 1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE_GET16(&fData->fTrie, c, category); 1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check the dictionary bit in the character's category. 1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Counter is only used by dictionary based iterators (subclasses). 1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Chars that need to be handled by a dictionary have a flag bit set 1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // in their category values. 1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((category & 0x4000) != 0) { 1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDictionaryCharCount++; 1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // And off the dictionary flag bit. 1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) category &= ~0x4000; 1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #ifdef RBBI_DEBUG 1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fTrace) { 1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText)); 1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (0x20<=c && c<0x7f) { 1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPrintf("\"%c\" ", c); 1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPrintf("%5x ", c); 1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPrintf("%3d %3d\n", state, category); 1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #endif 1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // State Transition - move machine to its next state 1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state = row->fNextState[category]; 1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) row = (RBBIStateTableRow *) 1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (statetable->fTableData + (statetable->fRowLen * state)); 1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (tableData + tableRowLen * state); 1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (row->fAccepting == -1) { 1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Match found, common case. 1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mode != RBBI_START) { 1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = (int32_t)UTEXT_GETNATIVEINDEX(fText); 1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values. 1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (row->fLookAhead != 0) { 1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (lookaheadStatus != 0 1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && row->fAccepting == lookaheadStatus) { 1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Lookahead match is completed. 1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = lookaheadResult; 1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = lookaheadTagIdx; 1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lookaheadStatus = 0; 1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: make a standalone hard break in a rule work. 1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (lookAheadHardBreak) { 1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_SETNATIVEINDEX(fText, result); 1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Look-ahead completed, but other rules may match further. Continue on 1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: junk this feature? I don't think it's used anywhwere. 1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto continueOn; 1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t r = (int32_t)UTEXT_GETNATIVEINDEX(fText); 1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lookaheadResult = r; 1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lookaheadStatus = row->fLookAhead; 1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lookaheadTagIdx = row->fTagIdx; 1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto continueOn; 1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (row->fAccepting != 0) { 1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Because this is an accepting state, any in-progress look-ahead match 1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // is no longer relavant. Clear out the pending lookahead status. 1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lookaheadStatus = 0; // clear out any pending look-ahead match. 1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)continueOn: 1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (state == STOP_STATE) { 1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This is the normal exit from the lookup state machine. 1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We have advanced through the string until it is certain that no 1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // longer match is possible, no matter what characters follow. 1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Advance to the next character. 1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If this is a beginning-of-input loop iteration, don't advance 1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the input position. The next iteration will be processing the 1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // first real input character. 1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mode == RBBI_RUN) { 1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = UTEXT_NEXT32(fText); 1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mode == RBBI_START) { 1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mode = RBBI_RUN; 1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The state machine is done. Check whether it found a match... 1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If the iterator failed to advance in the match engine, force it ahead by one. 1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (This really indicates a defect in the break rules. They should always match 1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // at least one character.) 1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (result == initialPosition) { 1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_SETNATIVEINDEX(fText, initialPosition); 1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_NEXT32(fText); 1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = (int32_t)UTEXT_GETNATIVEINDEX(fText); 1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Leave the iterator at our result position. 1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_SETNATIVEINDEX(fText, result); 1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #ifdef RBBI_DEBUG 1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fTrace) { 1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPrintf("result = %d\n\n", result); 1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #endif 1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------------- 1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// handlePrevious() 1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Iterate backwards, according to the logic of the reverse rules. 1135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// This version handles the exact style backwards rules. 1136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The logic of this function is very similar to handleNext(), above. 1138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------------- 1140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable) { 1141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t state; 1142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int16_t category = 0; 1143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIRunMode mode; 1144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIStateTableRow *row; 1145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 1146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lookaheadStatus = 0; 1147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t result = 0; 1148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t initialPosition = 0; 1149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lookaheadResult = 0; 1150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool lookAheadHardBreak = (statetable->fFlags & RBBI_LOOKAHEAD_HARD_BREAK) != 0; 1151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #ifdef RBBI_DEBUG 1153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fTrace) { 1154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPuts("Handle Previous pos char state category"); 1155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #endif 1157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // handlePrevious() never gets the rule status. 1159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Flag the status as invalid; if the user ever asks for status, we will need 1160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // to back up, then re-find the break position using handleNext(), which does 1161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // get the status value. 1162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = FALSE; 1163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = 0; 1164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if we're already at the start of the text, return DONE. 1166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fText == NULL || fData == NULL || UTEXT_GETNATIVEINDEX(fText)==0) { 1167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return BreakIterator::DONE; 1168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Set up the starting char. 1171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) initialPosition = (int32_t)UTEXT_GETNATIVEINDEX(fText); 1172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = initialPosition; 1173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = UTEXT_PREVIOUS32(fText); 1174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Set the initial state for the state machine 1176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state = START_STATE; 1177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) row = (RBBIStateTableRow *) 1178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (statetable->fTableData + (statetable->fRowLen * state)); 1179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) category = 3; 1180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mode = RBBI_RUN; 1181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (statetable->fFlags & RBBI_BOF_REQUIRED) { 1182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) category = 2; 1183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mode = RBBI_START; 1184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // loop until we reach the start of the text or transition to state 0 1188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 1190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == U_SENTINEL) { 1191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Reached end of input string. 1192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mode == RBBI_END) { 1193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We have already run the loop one last time with the 1194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // character set to the psueudo {eof} value. Now it is time 1195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // to unconditionally bail out. 1196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (lookaheadResult < result) { 1197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We ran off the end of the string with a pending look-ahead match. 1198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Treat this as if the look-ahead condition had been met, and return 1199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the match at the / position from the look-ahead rule. 1200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = lookaheadResult; 1201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lookaheadStatus = 0; 1202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (result == initialPosition) { 1203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Ran off start, no match found. 1204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // move one index one (towards the start, since we are doing a previous()) 1205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_SETNATIVEINDEX(fText, initialPosition); 1206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_PREVIOUS32(fText); // TODO: shouldn't be necessary. We're already at beginning. Check. 1207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Run the loop one last time with the fake end-of-input character category. 1211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mode = RBBI_END; 1212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) category = 1; 1213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Get the char category. An incoming category of 1 or 2 means that 1217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we are preset for doing the beginning or end of input, and 1218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // that we shouldn't get a category from an actual text input character. 1219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mode == RBBI_RUN) { 1221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // look up the current character's character category, which tells us 1222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // which column in the state table to look at. 1223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Note: the 16 in UTRIE_GET16 refers to the size of the data being returned, 1224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // not the size of the character going in, which is a UChar32. 1225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE_GET16(&fData->fTrie, c, category); 1227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check the dictionary bit in the character's category. 1229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Counter is only used by dictionary based iterators (subclasses). 1230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Chars that need to be handled by a dictionary have a flag bit set 1231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // in their category values. 1232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((category & 0x4000) != 0) { 1234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDictionaryCharCount++; 1235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // And off the dictionary flag bit. 1236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) category &= ~0x4000; 1237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #ifdef RBBI_DEBUG 1241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fTrace) { 1242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(fText)); 1243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (0x20<=c && c<0x7f) { 1244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPrintf("\"%c\" ", c); 1245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPrintf("%5x ", c); 1247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPrintf("%3d %3d\n", state, category); 1249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #endif 1251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // State Transition - move machine to its next state 1253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) state = row->fNextState[category]; 1255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) row = (RBBIStateTableRow *) 1256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (statetable->fTableData + (statetable->fRowLen * state)); 1257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (row->fAccepting == -1) { 1259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Match found, common case. 1260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = (int32_t)UTEXT_GETNATIVEINDEX(fText); 1261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (row->fLookAhead != 0) { 1264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (lookaheadStatus != 0 1265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && row->fAccepting == lookaheadStatus) { 1266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Lookahead match is completed. 1267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = lookaheadResult; 1268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lookaheadStatus = 0; 1269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: make a standalone hard break in a rule work. 1270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (lookAheadHardBreak) { 1271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_SETNATIVEINDEX(fText, result); 1272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 1273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Look-ahead completed, but other rules may match further. Continue on 1275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: junk this feature? I don't think it's used anywhwere. 1276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto continueOn; 1277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t r = (int32_t)UTEXT_GETNATIVEINDEX(fText); 1280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lookaheadResult = r; 1281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lookaheadStatus = row->fLookAhead; 1282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto continueOn; 1283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (row->fAccepting != 0) { 1287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Because this is an accepting state, any in-progress look-ahead match 1288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // is no longer relavant. Clear out the pending lookahead status. 1289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lookaheadStatus = 0; 1290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)continueOn: 1293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (state == STOP_STATE) { 1294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This is the normal exit from the lookup state machine. 1295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We have advanced through the string until it is certain that no 1296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // longer match is possible, no matter what characters follow. 1297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Move (backwards) to the next character to process. 1301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If this is a beginning-of-input loop iteration, don't advance 1302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the input position. The next iteration will be processing the 1303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // first real input character. 1304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mode == RBBI_RUN) { 1305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = UTEXT_PREVIOUS32(fText); 1306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (mode == RBBI_START) { 1308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mode = RBBI_RUN; 1309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The state machine is done. Check whether it found a match... 1314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If the iterator failed to advance in the match engine, force it ahead by one. 1316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (This really indicates a defect in the break rules. They should always match 1317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // at least one character.) 1318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (result == initialPosition) { 1319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_SETNATIVEINDEX(fText, initialPosition); 1320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_PREVIOUS32(fText); 1321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = (int32_t)UTEXT_GETNATIVEINDEX(fText); 1322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Leave the iterator at our result position. 1325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_SETNATIVEINDEX(fText, result); 1326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #ifdef RBBI_DEBUG 1327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fTrace) { 1328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RBBIDebugPrintf("result = %d\n\n", result); 1329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) #endif 1331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 1332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void 1336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::reset() 1337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCachedBreakPositions) { 1339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(fCachedBreakPositions); 1340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCachedBreakPositions = NULL; 1342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumCachedBreakPositions = 0; 1343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDictionaryCharCount = 0; 1344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPositionInCache = 0; 1345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// getRuleStatus() Return the break rule tag associated with the current 1352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// iterator position. If the iterator arrived at its current 1353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// position by iterating forwards, the value will have been 1354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// cached by the handleNext() function. 1355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// If no cached status value is available, the status is 1357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// found by doing a previous() followed by a next(), which 1358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// leaves the iterator where it started, and computes the 1359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// status while doing the next(). 1360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RuleBasedBreakIterator::makeRuleStatusValid() { 1363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fLastStatusIndexValid == FALSE) { 1364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No cached status is available. 1365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fText == NULL || current() == 0) { 1366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // At start of text, or there is no text. Status is always zero. 1367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastRuleStatusIndex = 0; 1368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLastStatusIndexValid = TRUE; 1369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Not at start of text. Find status the tedious way. 1371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t pa = current(); 1372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) previous(); 1373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fNumCachedBreakPositions > 0) { 1374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); // Blow off the dictionary cache 1375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t pb = next(); 1377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pa != pb) { 1378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // note: the if (pa != pb) test is here only to eliminate warnings for 1379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // unused local variables on gcc. Logically, it isn't needed. 1380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(pa == pb); 1381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(fLastRuleStatusIndex >= 0 && fLastRuleStatusIndex < fData->fStatusMaxIdx); 1385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::getRuleStatus() const { 1389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator *nonConstThis = (RuleBasedBreakIterator *)this; 1390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nonConstThis->makeRuleStatusValid(); 1391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // fLastRuleStatusIndex indexes to the start of the appropriate status record 1393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (the number of status values.) 1394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This function returns the last (largest) of the array of status values. 1395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t idx = fLastRuleStatusIndex + fData->fRuleStatusTable[fLastRuleStatusIndex]; 1396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t tagVal = fData->fRuleStatusTable[idx]; 1397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return tagVal; 1399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::getRuleStatusVec( 1405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *fillInVec, int32_t capacity, UErrorCode &status) 1406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator *nonConstThis = (RuleBasedBreakIterator *)this; 1412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nonConstThis->makeRuleStatusValid(); 1413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t numVals = fData->fRuleStatusTable[fLastRuleStatusIndex]; 1414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t numValsToCopy = numVals; 1415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (numVals > capacity) { 1416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_BUFFER_OVERFLOW_ERROR; 1417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) numValsToCopy = capacity; 1418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 1420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i<numValsToCopy; i++) { 1421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fillInVec[i] = fData->fRuleStatusTable[fLastRuleStatusIndex + i + 1]; 1422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return numVals; 1424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// getBinaryRules Access to the compiled form of the rules, 1431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// for use by build system tools that save the data 1432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// for standard iterator types. 1433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const uint8_t *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) { 1436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint8_t *retPtr = NULL; 1437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length = 0; 1438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData != NULL) { 1440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) retPtr = (const uint8_t *)fData->fHeader; 1441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length = fData->fHeader->fLength; 1442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return retPtr; 1444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// BufferClone TODO: In my (Andy) opinion, this function should be deprecated. 1452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Saving one heap allocation isn't worth the trouble. 1453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Cloning shouldn't be done in tight loops, and 1454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// making the clone copy involves other heap operations anyway. 1455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// And the application code for correctly dealing with buffer 1456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// size problems and the eventual object destruction is ugly. 1457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)BreakIterator * RuleBasedBreakIterator::createBufferClone(void *stackBuffer, 1460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t &bufferSize, 1461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) 1462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)){ 1464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 1465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If user buffer size is zero this is a preflight operation to 1469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // obtain the needed buffer size, allowing for worst case misalignment. 1470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (bufferSize == 0) { 1472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bufferSize = sizeof(RuleBasedBreakIterator) + U_ALIGNMENT_OFFSET_UP(0); 1473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 1474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check the alignment and size of the user supplied buffer. 1479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Allocate heap memory if the user supplied memory is insufficient. 1480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char *buf = (char *)stackBuffer; 1482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t s = bufferSize; 1483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (stackBuffer == NULL) { 1485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s = 0; // Ignore size, force allocation if user didn't give us a buffer. 1486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { 1488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t offsetUp = (uint32_t)U_ALIGNMENT_OFFSET_UP(buf); 1489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s -= offsetUp; 1490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buf += offsetUp; 1491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (s < sizeof(RuleBasedBreakIterator)) { 1493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Not enough room in the caller-supplied buffer. 1494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Do a plain-vanilla heap based clone and return that, along with 1495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // a warning that the clone was allocated. 1496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator *clonedBI = new RuleBasedBreakIterator(*this); 1497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (clonedBI == 0) { 1498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 1499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_SAFECLONE_ALLOCATED_WARNING; 1501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return clonedBI; 1503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Clone the source BI into the caller-supplied buffer. 1507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RuleBasedBreakIterator *clone = new(buf) RuleBasedBreakIterator(*this); 1509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) clone->fBufferClone = TRUE; // Flag to prevent deleting storage on close (From C code) 1510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return clone; 1512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// isDictionaryChar Return true if the category lookup for this char 1518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// indicates that it is in the set of dictionary lookup 1519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// chars. 1520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// This function is intended for use by dictionary based 1522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// break iterators. 1523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*UBool RuleBasedBreakIterator::isDictionaryChar(UChar32 c) { 1526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fData == NULL) { 1527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 1528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint16_t category; 1530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE_GET16(&fData->fTrie, c, category); 1531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (category & 0x4000) != 0; 1532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}*/ 1533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// checkDictionary This function handles all processing of characters in 1538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// the "dictionary" set. It will determine the appropriate 1539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// course of action, and possibly set up a cache in the 1540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// process. 1541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos, 1544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t endPos, 1545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool reverse) { 1546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Reset the old break cache first. 1547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 1548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // note: code segment below assumes that dictionary chars are in the 1550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // startPos-endPos range 1551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // value returned should be next character in sequence 1552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((endPos - startPos) <= 1) { 1553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (reverse ? startPos : endPos); 1554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Bug 5532. The dictionary code will crash if the input text is UTF-8 1557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // because native indexes are different from UTF-16 indexes. 1558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Temporary hack: skip dictionary lookup for UTF-8 encoded text. 1559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // It wont give the right breaks, but it's better than a crash. 1560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check the type of the UText by checking its pFuncs field, which 1562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // is UText's function dispatch table. It will be the same for all 1563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // UTF-8 UTexts and different for any other UText type. 1564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 1565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We have no other type of UText available with non-UTF-16 native indexing. 1566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This whole check will go away once the dictionary code is fixed. 1567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const void *utext_utf8Funcs; 1568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (utext_utf8Funcs == NULL) { 1569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Cache the UTF-8 UText function pointer value. 1570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UText tempUText = UTEXT_INITIALIZER; 1572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_openUTF8(&tempUText, NULL, 0, &status); 1573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_utf8Funcs = tempUText.pFuncs; 1574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_close(&tempUText); 1575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fText->pFuncs == utext_utf8Funcs) { 1577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (reverse ? startPos : endPos); 1578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Starting from the starting point, scan towards the proposed result, 1581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // looking for the first dictionary character (which may be the one 1582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we're on, if we're starting in the middle of a range). 1583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, reverse ? endPos : startPos); 1584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (reverse) { 1585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_PREVIOUS32(fText); 1586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t rangeStart = startPos; 1589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t rangeEnd = endPos; 1590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint16_t category; 1592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t current; 1593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UStack breaks(status); 1595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t foundBreakCount = 0; 1596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c = utext_current32(fText); 1597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE_GET16(&fData->fTrie, c, category); 1599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Is the character we're starting on a dictionary character? If so, we 1601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // need to back up to include the entire run; otherwise the results of 1602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the break algorithm will differ depending on where we start. Since 1603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the result is cached and there is typically a non-dictionary break 1604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // within a small number of words, there should be little performance impact. 1605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (category & 0x4000) { 1606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (reverse) { 1607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 1608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_next32(fText); // TODO: recast to work directly with postincrement. 1609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = utext_current32(fText); 1610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE_GET16(&fData->fTrie, c, category); 1611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while (c != U_SENTINEL && (category & 0x4000)); 1612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Back up to the last dictionary character 1613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rangeEnd = (int32_t)UTEXT_GETNATIVEINDEX(fText); 1614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == U_SENTINEL) { 1615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // c = fText->last32(); 1616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: why was this if needed? 1617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = UTEXT_PREVIOUS32(fText); 1618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 1620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = UTEXT_PREVIOUS32(fText); 1621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 1624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 1625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = UTEXT_PREVIOUS32(fText); 1626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE_GET16(&fData->fTrie, c, category); 1627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (c != U_SENTINEL && (category & 0x4000)); 1629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Back up to the last dictionary character 1630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == U_SENTINEL) { 1631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // c = fText->first32(); 1632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = utext_current32(fText); 1633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 1635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_next32(fText); 1636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = utext_current32(fText); 1637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rangeStart = (int32_t)UTEXT_GETNATIVEINDEX(fText);; 1639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE_GET16(&fData->fTrie, c, category); 1641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Loop through the text, looking for ranges of dictionary characters. 1644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // For each span, find the appropriate break engine, and ask it to find 1645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // any breaks within the span. 1646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Note: we always do this in the forward direction, so that the break 1647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // cache is built in the right order. 1648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (reverse) { 1649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, rangeStart); 1650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = utext_current32(fText); 1651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE_GET16(&fData->fTrie, c, category); 1652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(U_SUCCESS(status)) { 1654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) < rangeEnd && (category & 0x4000) == 0) { 1655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_next32(fText); // TODO: tweak for post-increment operation 1656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = utext_current32(fText); 1657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE_GET16(&fData->fTrie, c, category); 1658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (current >= rangeEnd) { 1660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We now have a dictionary character. Get the appropriate language object 1664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // to deal with it. 1665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const LanguageBreakEngine *lbe = getLanguageBreakEngine(c); 1666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Ask the language object if there are any breaks. It will leave the text 1668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // pointer on the other side of its range, ready to search for the next one. 1669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (lbe != NULL) { 1670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) foundBreakCount += lbe->findBreaks(fText, rangeStart, rangeEnd, FALSE, fBreakType, breaks); 1671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Reload the loop variables for the next go-round 1674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = utext_current32(fText); 1675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE_GET16(&fData->fTrie, c, category); 1676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If we found breaks, build a new break cache. The first and last entries must 1679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // be the original starting and ending position. 1680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (foundBreakCount > 0) { 1681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t totalBreaks = foundBreakCount; 1682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (startPos < breaks.elementAti(0)) { 1683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) totalBreaks += 1; 1684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (endPos > breaks.peeki()) { 1686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) totalBreaks += 1; 1687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCachedBreakPositions = (int32_t *)uprv_malloc(totalBreaks * sizeof(int32_t)); 1689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCachedBreakPositions != NULL) { 1690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t out = 0; 1691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNumCachedBreakPositions = totalBreaks; 1692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (startPos < breaks.elementAti(0)) { 1693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCachedBreakPositions[out++] = startPos; 1694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (int32_t i = 0; i < foundBreakCount; ++i) { 1696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCachedBreakPositions[out++] = breaks.elementAti(i); 1697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (endPos > fCachedBreakPositions[out-1]) { 1699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCachedBreakPositions[out] = endPos; 1700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If there are breaks, then by definition, we are replacing the original 1702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // proposed break by one of the breaks we found. Use following() and 1703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // preceding() to do the work. They should never recurse in this case. 1704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (reverse) { 1705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return preceding(endPos); 1706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 1708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return following(startPos); 1709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If the allocation failed, just fall through to the "no breaks found" case. 1712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If we get here, there were no language-based breaks. Set the text pointer 1715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // to the original proposed break. 1716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_setNativeIndex(fText, reverse ? startPos : endPos); 1717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (reverse ? startPos : endPos); 1718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 1721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// defined in ucln_cmn.h 1723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_NAMESPACE_QUALIFIER UStack *gLanguageBreakFactories = NULL; 1725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 1727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Release all static memory held by breakiterator. 1728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN 1730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UBool U_CALLCONV breakiterator_cleanup_dict(void) { 1731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (gLanguageBreakFactories) { 1732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete gLanguageBreakFactories; 1733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) gLanguageBreakFactories = NULL; 1734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 1736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END 1738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN 1740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void U_CALLCONV _deleteFactory(void *obj) { 1741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete (U_NAMESPACE_QUALIFIER LanguageBreakFactory *) obj; 1742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END 1744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 1745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const LanguageBreakEngine* 1747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)getLanguageBreakEngineFromFactory(UChar32 c, int32_t breakType) 1748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool needsInit; 1750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UMTX_CHECK(NULL, (UBool)(gLanguageBreakFactories == NULL), needsInit); 1752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (needsInit) { 1754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UStack *factories = new UStack(_deleteFactory, NULL, status); 1755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (factories != NULL && U_SUCCESS(status)) { 1756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status); 1757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) factories->push(builtIn, status); 1758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef U_LOCAL_SERVICE_HOOK 1759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status); 1760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (extra != NULL) { 1761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) factories->push(extra, status); 1762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) umtx_lock(NULL); 1766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (gLanguageBreakFactories == NULL) { 1767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) gLanguageBreakFactories = factories; 1768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) factories = NULL; 1769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR_DICT, breakiterator_cleanup_dict); 1770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) umtx_unlock(NULL); 1772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete factories; 1773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (gLanguageBreakFactories == NULL) { 1776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 1777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i = gLanguageBreakFactories->size(); 1780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const LanguageBreakEngine *lbe = NULL; 1781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (--i >= 0) { 1782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i)); 1783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lbe = factory->getEngineFor(c, breakType); 1784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (lbe != NULL) { 1785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return lbe; 1789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// getLanguageBreakEngine Find an appropriate LanguageBreakEngine for the 1795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// the characer c. 1796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 1797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------------- 1798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const LanguageBreakEngine * 1799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { 1800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const LanguageBreakEngine *lbe = NULL; 1801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 1802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fLanguageBreakEngines == NULL) { 1804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLanguageBreakEngines = new UStack(status); 1805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fLanguageBreakEngines == NULL || U_FAILURE(status)) { 1806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fLanguageBreakEngines; 1807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLanguageBreakEngines = 0; 1808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 1809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i = fLanguageBreakEngines->size(); 1813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (--i >= 0) { 1814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i)); 1815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (lbe->handles(c, fBreakType)) { 1816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return lbe; 1817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No existing dictionary took the character. See if a factory wants to 1821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // give us a new LanguageBreakEngine for this character. 1822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lbe = getLanguageBreakEngineFromFactory(c, fBreakType); 1823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If we got one, use it and push it on our stack. 1825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (lbe != NULL) { 1826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLanguageBreakEngines->push((void *)lbe, status); 1827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Even if we can't remember it, we can keep looking it up, so 1828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // return it even if the push fails. 1829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return lbe; 1830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No engine is forthcoming for this character. Add it to the 1833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // reject set. Create the reject break engine if needed. 1834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fUnhandledBreakEngine == NULL) { 1835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fUnhandledBreakEngine = new UnhandledEngine(status); 1836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) { 1837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 1838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Put it last so that scripts for which we have an engine get tried 1840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // first. 1841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLanguageBreakEngines->insertElementAt(fUnhandledBreakEngine, 0, status); 1842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If we can't insert it, or creation failed, get rid of it 1843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 1844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fUnhandledBreakEngine; 1845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fUnhandledBreakEngine = 0; 1846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 1847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Tell the reject engine about the character; at its discretion, it may 1851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // add more than just the one character. 1852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fUnhandledBreakEngine->handleCharacter(c, fBreakType); 1853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fUnhandledBreakEngine; 1855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*int32_t RuleBasedBreakIterator::getBreakType() const { 1860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fBreakType; 1861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}*/ 1862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RuleBasedBreakIterator::setBreakType(int32_t type) { 1864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fBreakType = type; 1865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 1866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 1869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1871