1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*************************************************************************** 38393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius* Copyright (C) 1999-2013 International Business Machines Corporation * 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All rights reserved. * 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*************************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 10/22/99 alan Creation. 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11/11/99 rgillam Complete port from Java. 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef RBBI_H 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define RBBI_H 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \file 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \brief C++ API: Rule Based Break Iterator 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/brkiter.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/udata.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/parseerr.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/schriter.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchriter.h" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct UTrie; 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** @internal */ 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct RBBIDataHeader; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass RuleBasedBreakIteratorTables; 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass BreakIterator; 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass RBBIDataWrapper; 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UStack; 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass LanguageBreakEngine; 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnhandledEngine; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct RBBIStateTable; 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A subclass of BreakIterator whose behavior is specified using a list of rules. 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p>Instances of this class are most commonly created by the factory methods of 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc., 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and then used via the abstract API in class BreakIterator</p> 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p>See the ICU User Guide for information on Break Iterator Rules.</p> 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p>This class is not intended to be subclassed. (Class DictionaryBasedBreakIterator 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is a subclass, but that relationship is effectively internal to the ICU 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * implementation. The subclassing interface to RulesBasedBreakIterator is 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not part of the ICU API, and may not remain stable.</p> 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass U_COMMON_API RuleBasedBreakIterator : public BreakIterator { 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected: 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The UText through which this BreakIterator accesses the text 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UText *fText; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A character iterator that refers to the same text as the UText, above. 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Only included for compatibility with old API, which was based on CharacterIterators. 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CharacterIterator *fCharIter; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the input text is provided by a UnicodeString, this will point to 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a characterIterator that wraps that data. Needed only for the 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * implementation of getText(), a backwards compatibility issue. 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru StringCharacterIterator *fSCharIter; 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the input text is provided by a UText, this 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * dummy CharacterIterator over an empty string will 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be returned from getText() 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCharCharacterIterator *fDCharIter; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The rule data for this BreakIterator instance 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDataWrapper *fData; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** Index of the Rule {tag} values for the most recent match. 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fLastRuleStatusIndex; 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Rule tag value valid flag. 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Some iterator operations don't intrinsically set the correct tag value. 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This flag lets us lazily compute the value if we are ever asked for it. 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool fLastStatusIndexValid; 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Counter for the number of characters encountered with the "dictionary" 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * flag set. 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t fDictionaryCharCount; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When a range of characters is divided up using the dictionary, the break 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * positions that are discovered are stored here, preventing us from having 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to use either the dictionary or the state table again until the iterator 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * leaves this range of text. Has the most impact for line breaking. 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t* fCachedBreakPositions; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The number of elements in fCachedBreakPositions 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fNumCachedBreakPositions; 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if fCachedBreakPositions is not null, this indicates which item in the 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * cache the current iteration position refers to 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fPositionInCache; 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If present, UStack of LanguageBreakEngine objects that might handle 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * dictionary characters. Searched from top to bottom to find an object to 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * handle a given character. 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UStack *fLanguageBreakEngines; 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If present, the special LanguageBreakEngine used for handling 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * characters that are in the dictionary set, but not handled by any 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * LangugageBreakEngine. 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnhandledEngine *fUnhandledBreakEngine; 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The type of the break iterator, or -1 if it has not been set. 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fBreakType; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected: 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // constructors 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 173103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#ifndef U_HIDE_INTERNAL_API 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 17585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Constant to be used in the constructor 17685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * RuleBasedBreakIterator(RBBIDataHeader*, EDontAdopt, UErrorCode &); 17785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * which does not adopt the memory indicated by the RBBIDataHeader* 17885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * parameter. 17985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 18085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @internal 18185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 18285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho enum EDontAdopt { 18385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho kDontAdopt 18485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho }; 18585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 18685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /** 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructor from a flattened set of RBBI data in malloced memory. 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * RulesBasedBreakIterators built from a custom set of rules 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are created via this constructor; the rules are compiled 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * into memory, then the break iterator is constructed here. 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The break iterator adopts the memory, and will 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * free it when done. 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /** 19985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Constructor from a flattened set of RBBI data in memory which need not 20085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * be malloced (e.g. it may be a memory-mapped file, etc.). 20185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 20285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * This version does not adopt the memory, and does not 20385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * free it when done. 20485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @internal 20585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 20685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status); 207103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif /* U_HIDE_INTERNAL_API */ 20885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru friend class RBBIRuleBuilder; 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** @internal */ 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru friend class BreakIterator; 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** Default constructor. Creates an empty shell of an iterator, with no 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * rules or text to iterate over. Object can subsequently be assigned to. 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.2 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator(); 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. Will produce a break iterator with the same behavior, 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and which iterates over the same text, as the one passed in. 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param that The RuleBasedBreakIterator passed to be copied 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator(const RuleBasedBreakIterator& that); 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Construct a RuleBasedBreakIterator from a set of rules supplied as a string. 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param rules The break rules to be used. 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param parseError In the event of a syntax error in the rules, provides the location 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * within the rules of the problem. 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Information on any errors encountered. 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.2 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator( const UnicodeString &rules, 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError &parseError, 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode &status); 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Contruct a RuleBasedBreakIterator from a set of precompiled binary rules. 246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules(). 247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Construction of a break iterator in this way is substantially faster than 248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * constuction from source rules. 249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Ownership of the storage containing the compiled rules remains with the 251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * caller of this function. The compiled rules must not be modified or 252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * deleted during the life of the break iterator. 253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The compiled rules are not compatible across different major versions of ICU. 255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The compiled rules are comaptible only between machines with the same 256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * byte ordering (little or big endian) and the same base character set family 257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * (ASCII or EBCDIC). 258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @see #getBinaryRules 260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param compiledRules A pointer to the compiled break rules to be used. 261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param ruleLength The length of the compiled break rules, in bytes. This 262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * corresponds to the length value produced by getBinaryRules(). 263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param status Information on any errors encountered, including invalid 264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * binary rules. 265103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho RuleBasedBreakIterator(const uint8_t *compiledRules, 268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uint32_t ruleLength, 269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UErrorCode &status); 270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This constructor uses the udata interface to create a BreakIterator 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * whose internal tables live in a memory-mapped file. "image" is an 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ICU UDataMemory handle for the pre-compiled break iterator tables. 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param image handle to the memory image for the break iterator data. 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Ownership of the UDataMemory handle passes to the Break Iterator, 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which will be responsible for closing it when it is no longer needed. 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Information on any errors encountered. 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @see udata_open 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @see #getBinaryRules 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.8 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual ~RuleBasedBreakIterator(); 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Assignment operator. Sets this iterator to have the same behavior, 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and iterate over the same text, as the one passed in. 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param that The RuleBasedBreakItertor passed in 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the newly created RuleBasedBreakIterator 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Equality operator. Returns TRUE if both BreakIterators are of the 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * same class, have the same behavior, and iterate over the same text. 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param that The BreakIterator to be compared for equality 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return TRUE if both BreakIterators are of the 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * same class, have the same behavior, and iterate over the same text. 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UBool operator==(const BreakIterator& that) const; 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Not-equal operator. If operator== returns TRUE, this returns FALSE, 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and vice versa. 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param that The BreakIterator to be compared for inequality 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return TRUE if both BreakIterators are not same. 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool operator!=(const BreakIterator& that) const; 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns a newly-constructed RuleBasedBreakIterator with the same 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * behavior, and iterating over the same text, as this one. 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Differs from the copy constructor in that it is polymorphic, and 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * will correctly clone (copy) a derived class. 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * clone() is thread safe. Multiple threads may simultaeneously 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * clone the same source break iterator. 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return a newly-constructed RuleBasedBreakIterator 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual BreakIterator* clone() const; 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Compute a hash code for this BreakIterator 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return A hash code 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t hashCode(void) const; 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns the description used to create this iterator 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the description used to create this iterator 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual const UnicodeString& getRules(void) const; 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // BreakIterator overrides 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return a CharacterIterator over the text being analyzed. 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The returned character iterator is owned by the break iterator, and must 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not be deleted by the caller. Repeated calls to this function may 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return the same CharacterIterator. 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * </p> 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The returned character iterator must not be used concurrently with 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the break iterator. If concurrent operation is needed, clone the 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returned character iterator first and operate on the clone. 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * </p> 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the break iterator is operating on text supplied via a UText, 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this function will fail. Lacking any way to signal failures, it 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returns an CharacterIterator containing no text. 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The function getUText() provides similar functionality, 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is reliable, and is more efficient. 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * </p> 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TODO: deprecate this function? 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return An iterator over the text being analyzed. 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual CharacterIterator& getText(void) const; 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get a UText for the text being analyzed. 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The returned UText is a shallow clone of the UText used internally 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * by the break iterator implementation. It can safely be used to 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * access the text without impacting any break iterator operations, 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but the underlying text itself must not be altered. 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param fillIn A UText to be filled in. If NULL, a new UText will be 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * allocated to hold the result. 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status receives any error codes. 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The current UText for this break iterator. If an input 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UText was provided, it will always be returned. 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 3.4 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UText *getUText(UText *fillIn, UErrorCode &status) const; 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set the iterator to analyze a new piece of text. This function resets 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the current iteration position to the beginning of the text. 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param newText An iterator over the text to analyze. The BreakIterator 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * takes ownership of the character iterator. The caller MUST NOT delete it! 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void adoptText(CharacterIterator* newText); 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set the iterator to analyze a new piece of text. This function resets 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the current iteration position to the beginning of the text. 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param newText The text to analyze. 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void setText(const UnicodeString& newText); 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Reset the break iterator to operate over the text represented by 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the UText. The iterator position is reset to the start. 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This function makes a shallow clone of the supplied UText. This means 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that the caller is free to immediately close or otherwise reuse the 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Utext that was passed as a parameter, but that the underlying text itself 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * must not be altered while being referenced by the break iterator. 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param text The UText used to change the text. 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Receives any error codes. 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 3.4 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void setText(UText *text, UErrorCode &status); 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the current iteration position to the beginning of the text. 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The offset of the beginning of the text. 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t first(void); 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the current iteration position to the end of the text. 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The text's past-the-end offset. 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t last(void); 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Advances the iterator either forward or backward the specified number of steps. 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Negative values move backward, and positive values move forward. This is 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * equivalent to repeatedly calling next() or previous(). 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param n The number of steps to move. The sign indicates the direction 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (negative is backwards, and positive is forwards). 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The character offset of the boundary position n boundaries away from 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the current one. 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t next(int32_t n); 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Advances the iterator to the next boundary position. 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The position of the first boundary after this one. 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t next(void); 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Moves the iterator backwards, to the last boundary preceding this one. 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The position of the last boundary position preceding this one. 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t previous(void); 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the iterator to refer to the first boundary position following 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the specified position. 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param offset The position from which to begin searching for a break position. 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The position of the first break after the current position. 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t following(int32_t offset); 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the iterator to refer to the last boundary position before the 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * specified position. 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param offset The position to begin searching for a break from. 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The position of the last boundary before the starting position. 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t preceding(int32_t offset); 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns true if the specfied position is a boundary position. As a side 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * effect, leaves the iterator pointing to the first boundary position at 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or after "offset". 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param offset the offset to check. 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return True if "offset" is a boundary position. 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UBool isBoundary(int32_t offset); 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns the current iteration position. 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The current iteration position. 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t current(void) const; 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the status tag from the break rule that determined the most recently 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returned break position. For break rules that do not specify a 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * status, a default value of 0 is returned. If more than one break rule 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * would cause a boundary to be located at some position in the text, 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the numerically largest of the applicable status values is returned. 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Of the standard types of ICU break iterators, only word break and 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * line break provide status values. The values are defined in 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the header file ubrk.h. For Word breaks, the status allows distinguishing between words 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that contain alphabetic letters, "words" that appear to be numbers, 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * punctuation and spaces, words containing ideographic characters, and 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * more. For Line Break, the status distinguishes between hard (mandatory) breaks 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and soft (potential) break positions. 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <code>getRuleStatus()</code> can be called after obtaining a boundary 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * position from <code>next()</code>, <code>previous()</code>, or 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * any other break iterator functions that returns a boundary position. 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When creating custom break rules, one is free to define whatever 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * status values may be convenient for the application. 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note: this function is not thread safe. It should not have been 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * declared const, and the const remains only for compatibility 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reasons. (The function is logically const, but not bit-wise const). 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the status from the break rule that determined the most recently 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returned break position. 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @see UWordBreak 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.2 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t getRuleStatus() const; 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the status (tag) values from the break rule(s) that determined the most 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * recently returned break position. 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <p> 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The returned status value(s) are stored into an array provided by the caller. 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The values are stored in sorted (ascending) order. 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If the capacity of the output array is insufficient to hold the data, 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the output will be truncated to the available length, and a 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * U_BUFFER_OVERFLOW_ERROR will be signaled. 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param fillInVec an array to be filled in with the status values. 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param capacity the length of the supplied vector. A length of zero causes 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the function to return the number of status values, in the 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * normal way, without attemtping to store any values. 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status receives error codes. 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The number of rule status values from rules that determined 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the most recent boundary returned by the break iterator. 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is the total number of status values that were available, 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not the reduced number that were actually returned. 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @see getRuleStatus 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 3.0 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This method is to implement a simple version of RTTI, since not all 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * C++ compilers support genuine RTTI. Polymorphic operator==() and 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * clone() methods call this method. 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The class ID for this object. All objects of a 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * given class have the same class ID. Objects of 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * other classes have different class IDs. 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UClassID getDynamicClassID(void) const; 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns the class ID for this class. This is useful only for 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * comparing to a return value from getDynamicClassID(). For example: 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Base* polymorphic_pointer = createPolymorphicObject(); 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if (polymorphic_pointer->getDynamicClassID() == 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Derived::getStaticClassID()) ... 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return The class ID for all objects of this class. 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static UClassID U_EXPORT2 getStaticClassID(void); 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 58659d709d503bab6e2b61931737e662dd293b40578ccornelius /** 58759d709d503bab6e2b61931737e662dd293b40578ccornelius * Deprecated functionality. Use clone() instead. 58859d709d503bab6e2b61931737e662dd293b40578ccornelius * 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Create a clone (copy) of this break iterator in memory provided 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * by the caller. The idea is to increase performance by avoiding 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a storage allocation. Use of this functoin is NOT RECOMMENDED. 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Performance gains are minimal, and correct buffer management is 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * tricky. Use clone() instead. 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param stackBuffer The pointer to the memory into which the cloned object 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * should be placed. If NULL, allocate heap memory 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for the cloned object. 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param BufferSize The size of the buffer. If zero, return the required 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * buffer size, but do not clone the object. If the 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * size was too small (but not zero), allocate heap 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * storage for the cloned object. 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returned if the the provided buffer was too small, and 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the clone was therefore put on the heap. 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return Pointer to the clone object. This may differ from the stackBuffer 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * address if the byte alignment of the stack buffer was not suitable 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or if the stackBuffer was too small to hold the clone. 61059d709d503bab6e2b61931737e662dd293b40578ccornelius * @deprecated ICU 52. Use clone() instead. 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual BreakIterator * createBufferClone(void *stackBuffer, 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t &BufferSize, 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode &status); 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the binary form of compiled break rules, 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which can then be used to create a new break iterator at some 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * time in the future. Creating a break iterator from pre-compiled rules 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is much faster than building one from the source form of the 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * break rules. 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The binary data can only be used with the same version of ICU 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and on the same platform type (processor endian-ness) 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param length Returns the length of the binary data. (Out paramter.) 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return A pointer to the binary (compiled) rule data. The storage 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * belongs to the RulesBasedBreakIterator object, not the 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * caller, and must not be modified or deleted. 632103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual const uint8_t *getBinaryRules(uint32_t &length); 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 6364fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes /** 6374fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * Set the subject text string upon which the break iterator is operating 6384fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * without changing any other aspect of the matching state. 6394fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * The new and previous text strings must have the same content. 6404fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * 6414fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * This function is intended for use in environments where ICU is operating on 6424fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * strings that may move around in memory. It provides a mechanism for notifying 6434fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * ICU that the string has been relocated, and providing a new UText to access the 6444fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * string in its new position. 6454fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * 6464fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * Note that the break iterator implementation never copies the underlying text 6474fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * of a string being processed, but always operates directly on the original text 6484fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * provided by the user. Refreshing simply drops the references to the old text 6494fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * and replaces them with references to the new. 6504fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * 6514fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * Caution: this function is normally used only by very specialized, 6524fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * system-level code. One example use case is with garbage collection that moves 6534fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * the text in memory. 6544fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * 6554fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * @param input The new (moved) text string. 6564fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * @param status Receives errors detected by this function. 6574fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * @return *this 6584fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes * 6598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49 6604fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes */ 6614fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status); 6624fceb0aeb072e9c6879c37fbcdcef2c4286c4719Elliott Hughes 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected: 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // implementation 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //======================================================================= 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Dumps caches and performs other actions associated with a complete change 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in text or iteration position. 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void reset(void); 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return true if the category lookup for this char 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indicates that it is in the set of dictionary lookup chars. 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This function is intended for use by dictionary based break iterators. 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return true if the category lookup for this char 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indicates that it is in the set of dictionary lookup chars. 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UBool isDictionaryChar(UChar32); 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the type of the break iterator. 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual int32_t getBreakType() const; 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set the type of the break iterator. 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual void setBreakType(int32_t type); 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 699103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#ifndef U_HIDE_INTERNAL_API 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Common initialization function, used by constructors and bufferClone. 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void init(); 705103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif /* U_HIDE_INTERNAL_API */ 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This method backs the iterator back up to a "safe position" in the text. 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is a position that we know, without any context, must be a break position. 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The various calling methods then iterate forward from this safe position to 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the appropriate position to return. (For more information, see the description 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.) 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param statetable state table used of moving backwards 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t handlePrevious(const RBBIStateTable *statetable); 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This method is the actual implementation of the next() method. All iteration 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * vectors through here. This method initializes the state machine to state 1 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and advances through the text character by character until we reach the end 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of the text or the state machine transitions to state 0. We update our return 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * value every time the state machine passes through a possible end state. 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param statetable state table used of moving forwards 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t handleNext(const RBBIStateTable *statetable); 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprotected: 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 733103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#ifndef U_HIDE_INTERNAL_API 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is the function that actually implements dictionary-based 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * breaking. Covering at least the range from startPos to endPos, 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it checks for dictionary characters, and if it finds them determines 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the appropriate object to deal with them. It may cache found breaks in 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * fCachedBreakPositions as it goes. It may well also look at text outside 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the range startPos to endPos. 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If going forward, endPos is the normal Unicode break result, and 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if goind in reverse, startPos is the normal Unicode break result 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param startPos The start position of a range of text 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param endPos The end position of a range of text 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param reverse The call is for the reverse direction 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse); 749103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif /* U_HIDE_INTERNAL_API */ 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This function returns the appropriate LanguageBreakEngine for a 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * given character c. 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param c A character in the dictionary set 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void makeRuleStatusValid(); 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------------------------ 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Inline Functions Definitions ... 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------------------------ 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruinline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return !operator==(that); 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 783