1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*************************************************************************** 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 1999-2011 International Business Machines Corporation * 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All rights reserved. * 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*************************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 10/22/99 alan Creation. 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11/11/99 rgillam Complete port from Java. 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef RBBI_H 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define RBBI_H 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \file 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \brief C++ API: Rule Based Break Iterator 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/brkiter.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/udata.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/parseerr.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/schriter.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchriter.h" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct UTrie; 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** @internal */ 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct RBBIDataHeader; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass RuleBasedBreakIteratorTables; 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass BreakIterator; 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass RBBIDataWrapper; 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UStack; 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass LanguageBreakEngine; 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnhandledEngine; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct RBBIStateTable; 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A subclass of BreakIterator whose behavior is specified using a list of rules. 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p>Instances of this class are most commonly created by the factory methods of 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc., 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and then used via the abstract API in class BreakIterator</p> 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p>See the ICU User Guide for information on Break Iterator Rules.</p> 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p>This class is not intended to be subclassed. (Class DictionaryBasedBreakIterator 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is a subclass, but that relationship is effectively internal to the ICU 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * implementation. The subclassing interface to RulesBasedBreakIterator is 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not part of the ICU API, and may not remain stable.</p> 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass U_COMMON_API RuleBasedBreakIterator : public BreakIterator { 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected: 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UText through which this BreakIterator accesses the text 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UText *fText; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A character iterator that refers to the same text as the UText, above. 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Only included for compatibility with old API, which was based on CharacterIterators. 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CharacterIterator *fCharIter; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the input text is provided by a UnicodeString, this will point to 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a characterIterator that wraps that data. Needed only for the 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * implementation of getText(), a backwards compatibility issue. 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru StringCharacterIterator *fSCharIter; 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the input text is provided by a UText, this 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * dummy CharacterIterator over an empty string will 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be returned from getText() 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCharCharacterIterator *fDCharIter; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The rule data for this BreakIterator instance 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDataWrapper *fData; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** Index of the Rule {tag} values for the most recent match. 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fLastRuleStatusIndex; 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Rule tag value valid flag. 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Some iterator operations don't intrinsically set the correct tag value. 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This flag lets us lazily compute the value if we are ever asked for it. 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool fLastStatusIndexValid; 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Counter for the number of characters encountered with the "dictionary" 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * flag set. 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fDictionaryCharCount; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When a range of characters is divided up using the dictionary, the break 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * positions that are discovered are stored here, preventing us from having 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to use either the dictionary or the state table again until the iterator 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * leaves this range of text. Has the most impact for line breaking. 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t* fCachedBreakPositions; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The number of elements in fCachedBreakPositions 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fNumCachedBreakPositions; 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if fCachedBreakPositions is not null, this indicates which item in the 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * cache the current iteration position refers to 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fPositionInCache; 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If present, UStack of LanguageBreakEngine objects that might handle 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * dictionary characters. Searched from top to bottom to find an object to 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * handle a given character. 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UStack *fLanguageBreakEngines; 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If present, the special LanguageBreakEngine used for handling 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * characters that are in the dictionary set, but not handled by any 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * LangugageBreakEngine. 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnhandledEngine *fUnhandledBreakEngine; 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The type of the break iterator, or -1 if it has not been set. 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fBreakType; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected: 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // constructors 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 17485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Constant to be used in the constructor 17585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * RuleBasedBreakIterator(RBBIDataHeader*, EDontAdopt, UErrorCode &); 17685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * which does not adopt the memory indicated by the RBBIDataHeader* 17785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * parameter. 17885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 17985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @internal 18085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 18185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho enum EDontAdopt { 18285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho kDontAdopt 18385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho }; 18485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 18585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /** 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructor from a flattened set of RBBI data in malloced memory. 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * RulesBasedBreakIterators built from a custom set of rules 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are created via this constructor; the rules are compiled 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * into memory, then the break iterator is constructed here. 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The break iterator adopts the memory, and will 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * free it when done. 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /** 19885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Constructor from a flattened set of RBBI data in memory which need not 19985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * be malloced (e.g. it may be a memory-mapped file, etc.). 20085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 20185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * This version does not adopt the memory, and does not 20285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * free it when done. 20385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @internal 20485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 20585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status); 20685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru friend class RBBIRuleBuilder; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** @internal */ 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru friend class BreakIterator; 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** Default constructor. Creates an empty shell of an iterator, with no 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * rules or text to iterate over. Object can subsequently be assigned to. 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.2 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator(); 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. Will produce a break iterator with the same behavior, 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and which iterates over the same text, as the one passed in. 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param that The RuleBasedBreakIterator passed to be copied 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator(const RuleBasedBreakIterator& that); 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Construct a RuleBasedBreakIterator from a set of rules supplied as a string. 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param rules The break rules to be used. 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param parseError In the event of a syntax error in the rules, provides the location 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * within the rules of the problem. 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Information on any errors encountered. 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.2 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator( const UnicodeString &rules, 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError &parseError, 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode &status); 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Contruct a RuleBasedBreakIterator from a set of precompiled binary rules. 247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules(). 248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Construction of a break iterator in this way is substantially faster than 249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * constuction from source rules. 250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Ownership of the storage containing the compiled rules remains with the 252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * caller of this function. The compiled rules must not be modified or 253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * deleted during the life of the break iterator. 254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The compiled rules are not compatible across different major versions of ICU. 256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The compiled rules are comaptible only between machines with the same 257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * byte ordering (little or big endian) and the same base character set family 258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * (ASCII or EBCDIC). 259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @see #getBinaryRules 261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param compiledRules A pointer to the compiled break rules to be used. 262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param ruleLength The length of the compiled break rules, in bytes. This 263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * corresponds to the length value produced by getBinaryRules(). 264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param status Information on any errors encountered, including invalid 265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * binary rules. 266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @draft ICU 4.8 267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RuleBasedBreakIterator(const uint8_t *compiledRules, 269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint32_t ruleLength, 270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UErrorCode &status); 271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This constructor uses the udata interface to create a BreakIterator 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * whose internal tables live in a memory-mapped file. "image" is an 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ICU UDataMemory handle for the pre-compiled break iterator tables. 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param image handle to the memory image for the break iterator data. 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Ownership of the UDataMemory handle passes to the Break Iterator, 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which will be responsible for closing it when it is no longer needed. 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Information on any errors encountered. 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @see udata_open 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @see #getBinaryRules 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.8 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual ~RuleBasedBreakIterator(); 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Assignment operator. Sets this iterator to have the same behavior, 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and iterate over the same text, as the one passed in. 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param that The RuleBasedBreakItertor passed in 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the newly created RuleBasedBreakIterator 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Equality operator. Returns TRUE if both BreakIterators are of the 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * same class, have the same behavior, and iterate over the same text. 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param that The BreakIterator to be compared for equality 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return TRUE if both BreakIterators are of the 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * same class, have the same behavior, and iterate over the same text. 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UBool operator==(const BreakIterator& that) const; 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Not-equal operator. If operator== returns TRUE, this returns FALSE, 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and vice versa. 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param that The BreakIterator to be compared for inequality 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return TRUE if both BreakIterators are not same. 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool operator!=(const BreakIterator& that) const; 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns a newly-constructed RuleBasedBreakIterator with the same 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * behavior, and iterating over the same text, as this one. 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Differs from the copy constructor in that it is polymorphic, and 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * will correctly clone (copy) a derived class. 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * clone() is thread safe. Multiple threads may simultaeneously 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * clone the same source break iterator. 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return a newly-constructed RuleBasedBreakIterator 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual BreakIterator* clone() const; 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Compute a hash code for this BreakIterator 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return A hash code 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t hashCode(void) const; 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns the description used to create this iterator 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the description used to create this iterator 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual const UnicodeString& getRules(void) const; 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // BreakIterator overrides 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return a CharacterIterator over the text being analyzed. 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The returned character iterator is owned by the break iterator, and must 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not be deleted by the caller. Repeated calls to this function may 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return the same CharacterIterator. 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * </p> 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The returned character iterator must not be used concurrently with 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the break iterator. If concurrent operation is needed, clone the 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returned character iterator first and operate on the clone. 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * </p> 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the break iterator is operating on text supplied via a UText, 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this function will fail. Lacking any way to signal failures, it 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returns an CharacterIterator containing no text. 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The function getUText() provides similar functionality, 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is reliable, and is more efficient. 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * </p> 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TODO: deprecate this function? 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return An iterator over the text being analyzed. 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual CharacterIterator& getText(void) const; 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get a UText for the text being analyzed. 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The returned UText is a shallow clone of the UText used internally 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * by the break iterator implementation. It can safely be used to 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * access the text without impacting any break iterator operations, 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but the underlying text itself must not be altered. 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param fillIn A UText to be filled in. If NULL, a new UText will be 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * allocated to hold the result. 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status receives any error codes. 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The current UText for this break iterator. If an input 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UText was provided, it will always be returned. 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 3.4 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UText *getUText(UText *fillIn, UErrorCode &status) const; 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set the iterator to analyze a new piece of text. This function resets 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the current iteration position to the beginning of the text. 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param newText An iterator over the text to analyze. The BreakIterator 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * takes ownership of the character iterator. The caller MUST NOT delete it! 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void adoptText(CharacterIterator* newText); 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set the iterator to analyze a new piece of text. This function resets 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the current iteration position to the beginning of the text. 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param newText The text to analyze. 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void setText(const UnicodeString& newText); 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Reset the break iterator to operate over the text represented by 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the UText. The iterator position is reset to the start. 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This function makes a shallow clone of the supplied UText. This means 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that the caller is free to immediately close or otherwise reuse the 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Utext that was passed as a parameter, but that the underlying text itself 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * must not be altered while being referenced by the break iterator. 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param text The UText used to change the text. 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Receives any error codes. 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 3.4 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void setText(UText *text, UErrorCode &status); 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the current iteration position to the beginning of the text. 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The offset of the beginning of the text. 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t first(void); 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the current iteration position to the end of the text. 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The text's past-the-end offset. 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t last(void); 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Advances the iterator either forward or backward the specified number of steps. 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Negative values move backward, and positive values move forward. This is 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * equivalent to repeatedly calling next() or previous(). 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param n The number of steps to move. The sign indicates the direction 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (negative is backwards, and positive is forwards). 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The character offset of the boundary position n boundaries away from 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the current one. 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t next(int32_t n); 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Advances the iterator to the next boundary position. 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The position of the first boundary after this one. 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t next(void); 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Moves the iterator backwards, to the last boundary preceding this one. 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The position of the last boundary position preceding this one. 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t previous(void); 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the iterator to refer to the first boundary position following 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the specified position. 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param offset The position from which to begin searching for a break position. 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The position of the first break after the current position. 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t following(int32_t offset); 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the iterator to refer to the last boundary position before the 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * specified position. 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param offset The position to begin searching for a break from. 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The position of the last boundary before the starting position. 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t preceding(int32_t offset); 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns true if the specfied position is a boundary position. As a side 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * effect, leaves the iterator pointing to the first boundary position at 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or after "offset". 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param offset the offset to check. 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return True if "offset" is a boundary position. 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UBool isBoundary(int32_t offset); 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns the current iteration position. 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The current iteration position. 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t current(void) const; 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the status tag from the break rule that determined the most recently 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returned break position. For break rules that do not specify a 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * status, a default value of 0 is returned. If more than one break rule 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * would cause a boundary to be located at some position in the text, 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the numerically largest of the applicable status values is returned. 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Of the standard types of ICU break iterators, only word break and 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * line break provide status values. The values are defined in 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the header file ubrk.h. For Word breaks, the status allows distinguishing between words 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that contain alphabetic letters, "words" that appear to be numbers, 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * punctuation and spaces, words containing ideographic characters, and 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * more. For Line Break, the status distinguishes between hard (mandatory) breaks 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and soft (potential) break positions. 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <code>getRuleStatus()</code> can be called after obtaining a boundary 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * position from <code>next()</code>, <code>previous()</code>, or 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * any other break iterator functions that returns a boundary position. 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When creating custom break rules, one is free to define whatever 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * status values may be convenient for the application. 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note: this function is not thread safe. It should not have been 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * declared const, and the const remains only for compatibility 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reasons. (The function is logically const, but not bit-wise const). 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the status from the break rule that determined the most recently 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returned break position. 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @see UWordBreak 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.2 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t getRuleStatus() const; 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the status (tag) values from the break rule(s) that determined the most 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * recently returned break position. 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The returned status value(s) are stored into an array provided by the caller. 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The values are stored in sorted (ascending) order. 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If the capacity of the output array is insufficient to hold the data, 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the output will be truncated to the available length, and a 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * U_BUFFER_OVERFLOW_ERROR will be signaled. 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param fillInVec an array to be filled in with the status values. 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param capacity the length of the supplied vector. A length of zero causes 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the function to return the number of status values, in the 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * normal way, without attemtping to store any values. 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status receives error codes. 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The number of rule status values from rules that determined 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the most recent boundary returned by the break iterator. 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is the total number of status values that were available, 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not the reduced number that were actually returned. 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @see getRuleStatus 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 3.0 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This method is to implement a simple version of RTTI, since not all 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * C++ compilers support genuine RTTI. Polymorphic operator==() and 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * clone() methods call this method. 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The class ID for this object. All objects of a 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * given class have the same class ID. Objects of 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * other classes have different class IDs. 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UClassID getDynamicClassID(void) const; 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns the class ID for this class. This is useful only for 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * comparing to a return value from getDynamicClassID(). For example: 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Base* polymorphic_pointer = createPolymorphicObject(); 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if (polymorphic_pointer->getDynamicClassID() == 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Derived::getStaticClassID()) ... 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The class ID for all objects of this class. 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static UClassID U_EXPORT2 getStaticClassID(void); 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Create a clone (copy) of this break iterator in memory provided 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * by the caller. The idea is to increase performance by avoiding 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a storage allocation. Use of this functoin is NOT RECOMMENDED. 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Performance gains are minimal, and correct buffer management is 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * tricky. Use clone() instead. 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param stackBuffer The pointer to the memory into which the cloned object 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * should be placed. If NULL, allocate heap memory 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for the cloned object. 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param BufferSize The size of the buffer. If zero, return the required 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * buffer size, but do not clone the object. If the 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * size was too small (but not zero), allocate heap 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * storage for the cloned object. 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returned if the the provided buffer was too small, and 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the clone was therefore put on the heap. 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return Pointer to the clone object. This may differ from the stackBuffer 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * address if the byte alignment of the stack buffer was not suitable 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or if the stackBuffer was too small to hold the clone. 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual BreakIterator * createBufferClone(void *stackBuffer, 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t &BufferSize, 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode &status); 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the binary form of compiled break rules, 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which can then be used to create a new break iterator at some 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * time in the future. Creating a break iterator from pre-compiled rules 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is much faster than building one from the source form of the 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * break rules. 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The binary data can only be used with the same version of ICU 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and on the same platform type (processor endian-ness) 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param length Returns the length of the binary data. (Out paramter.) 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return A pointer to the binary (compiled) rule data. The storage 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * belongs to the RulesBasedBreakIterator object, not the 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * caller, and must not be modified or deleted. 632b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @draft ICU 4.8 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual const uint8_t *getBinaryRules(uint32_t &length); 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected: 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // implementation 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Dumps caches and performs other actions associated with a complete change 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in text or iteration position. 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void reset(void); 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return true if the category lookup for this char 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indicates that it is in the set of dictionary lookup chars. 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This function is intended for use by dictionary based break iterators. 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return true if the category lookup for this char 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indicates that it is in the set of dictionary lookup chars. 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UBool isDictionaryChar(UChar32); 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the type of the break iterator. 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t getBreakType() const; 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set the type of the break iterator. 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void setBreakType(int32_t type); 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Common initialization function, used by constructors and bufferClone. 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (Also used by DictionaryBasedBreakIterator::createBufferClone().) 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void init(); 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This method backs the iterator back up to a "safe position" in the text. 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is a position that we know, without any context, must be a break position. 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The various calling methods then iterate forward from this safe position to 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the appropriate position to return. (For more information, see the description 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.) 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param statetable state table used of moving backwards 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t handlePrevious(const RBBIStateTable *statetable); 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This method is the actual implementation of the next() method. All iteration 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * vectors through here. This method initializes the state machine to state 1 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and advances through the text character by character until we reach the end 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of the text or the state machine transitions to state 0. We update our return 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * value every time the state machine passes through a possible end state. 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param statetable state table used of moving forwards 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t handleNext(const RBBIStateTable *statetable); 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected: 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is the function that actually implements dictionary-based 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * breaking. Covering at least the range from startPos to endPos, 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it checks for dictionary characters, and if it finds them determines 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the appropriate object to deal with them. It may cache found breaks in 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * fCachedBreakPositions as it goes. It may well also look at text outside 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the range startPos to endPos. 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If going forward, endPos is the normal Unicode break result, and 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if goind in reverse, startPos is the normal Unicode break result 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param startPos The start position of a range of text 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param endPos The end position of a range of text 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param reverse The call is for the reverse direction 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse); 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This function returns the appropriate LanguageBreakEngine for a 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * given character c. 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param c A character in the dictionary set 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void makeRuleStatusValid(); 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------------------------ 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Inline Functions Definitions ... 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------------------------ 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruinline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return !operator==(that); 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 753