1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)****************************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 1996-2010, International Business Machines Corporation and others. 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* All Rights Reserved. 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)****************************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef UBRK_H 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UBRK_H 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uloc.h" 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utext.h" 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/localpointer.h" 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * A text-break iterator. 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * For usage in C programs. 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)# define UBRK_TYPEDEF_UBREAK_ITERATOR 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Opaque type representing an ICU Break iterator object. 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) typedef struct UBreakIterator UBreakIterator; 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_BREAK_ITERATION 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/parseerr.h" 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * \file 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * \brief C API: BreakIterator 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <h2> BreakIterator C API </h2> 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The BreakIterator C API defines methods for finding the location 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of boundaries in text. Pointer to a UBreakIterator maintain a 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * current position and scan over text returning the index of characters 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * where boundaries occur. 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p> 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Line boundary analysis determines where a text string can be broken 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * when line-wrapping. The mechanism correctly handles punctuation and 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * hyphenated words. 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p> 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sentence boundary analysis allows selection with correct 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * interpretation of periods within numbers and abbreviations, and 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * trailing punctuation marks such as quotation marks and parentheses. 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p> 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Word boundary analysis is used by search and replace functions, as 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * well as within text editing applications that allow the user to 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * select words with a double click. Word selection provides correct 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * interpretation of punctuation marks within and following 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * words. Characters that are not part of a word, such as symbols or 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * punctuation marks, have word-breaks on both sides. 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p> 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Character boundary analysis identifies the boundaries of 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * "Extended Grapheme Clusters", which are groupings of codepoints 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * that should be treated as character-like units for many text operations. 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Please see Unicode Standard Annex #29, Unicode Text Segmentation, 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * http://www.unicode.org/reports/tr29/ for additional information 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * on grapheme clusters and guidelines on their use. 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p> 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Title boundary analysis locates all positions, 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * typically starts of words, that should be set to Title Case 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * when title casing the text. 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p> 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The text boundary positions are found according to the rules 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * described in Unicode Standard Annex #29, Text Boundaries, and 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Unicode Standard Annex #14, Line Breaking Properties. These 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * are available at http://www.unicode.org/reports/tr14/ and 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * http://www.unicode.org/reports/tr29/. 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p> 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * In addition to the plain C API defined in this header file, an 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * object oriented C++ API with equivalent functionality is defined in the 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * file brkiter.h. 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p> 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Code snippets illustrating the use of the Break Iterator APIs 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * are available in the ICU User Guide, 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * http://icu-project.org/userguide/boundaryAnalysis.html 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and in the sample program icu/source/samples/break/break.cpp 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** The possible types of text boundaries. @stable ICU 2.0 */ 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef enum UBreakIteratorType { 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Character breaks @stable ICU 2.0 */ 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_CHARACTER = 0, 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Word breaks @stable ICU 2.0 */ 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_WORD = 1, 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Line breaks @stable ICU 2.0 */ 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_LINE = 2, 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Sentence breaks @stable ICU 2.0 */ 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_SENTENCE = 3, 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef U_HIDE_DEPRECATED_API 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Title Case breaks 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The iterator created using this type locates title boundaries as described for 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * please use Word Boundary iterator. 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later. 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_TITLE = 4, 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* U_HIDE_DEPRECATED_API */ 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_COUNT = 5 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} UBreakIteratorType; 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** Value indicating all text boundaries have been returned. 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UBRK_DONE ((int32_t) -1) 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Enum constants for the word break tags returned by 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * getRuleStatus(). A range of values is defined for each category of 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * word, to allow for further subdivisions of a category in future releases. 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Applications should check for tag values falling within the range, rather 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * than for single individual values. 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.2 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef enum UWordBreak { 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for "words" that do not fit into any of other categories. 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Includes spaces and most punctuation. */ 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_WORD_NONE = 0, 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Upper bound for tags for uncategorized words. */ 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_WORD_NONE_LIMIT = 100, 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for words that appear to be numbers, lower limit. */ 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_WORD_NUMBER = 100, 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for words that appear to be numbers, upper limit. */ 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_WORD_NUMBER_LIMIT = 200, 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for words that contain letters, excluding 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * hiragana, katakana or ideographic characters, lower limit. */ 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_WORD_LETTER = 200, 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for words containing letters, upper limit */ 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_WORD_LETTER_LIMIT = 300, 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for words containing kana characters, lower limit */ 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_WORD_KANA = 300, 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for words containing kana characters, upper limit */ 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_WORD_KANA_LIMIT = 400, 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for words containing ideographic characters, lower limit */ 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_WORD_IDEO = 400, 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for words containing ideographic characters, upper limit */ 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_WORD_IDEO_LIMIT = 500 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} UWordBreak; 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Enum constants for the line break tags returned by getRuleStatus(). 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * A range of values is defined for each category of 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * word, to allow for further subdivisions of a category in future releases. 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Applications should check for tag values falling within the range, rather 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * than for single individual values. 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.8 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef enum ULineBreakTag { 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for soft line breaks, positions at which a line break 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * is acceptable but not required */ 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_LINE_SOFT = 0, 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Upper bound for soft line breaks. */ 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_LINE_SOFT_LIMIT = 100, 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for a hard, or mandatory line break */ 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_LINE_HARD = 100, 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Upper bound for hard line breaks. */ 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_LINE_HARD_LIMIT = 200 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} ULineBreakTag; 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Enum constants for the sentence break tags returned by getRuleStatus(). 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * A range of values is defined for each category of 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * sentence, to allow for further subdivisions of a category in future releases. 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Applications should check for tag values falling within the range, rather 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * than for single individual values. 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.8 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef enum USentenceBreakTag { 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for for sentences ending with a sentence terminator 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * ('.', '?', '!', etc.) character, possibly followed by a 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * hard separator (CR, LF, PS, etc.) 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_SENTENCE_TERM = 0, 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Upper bound for tags for sentences ended by sentence terminators. */ 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_SENTENCE_TERM_LIMIT = 100, 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for for sentences that do not contain an ending 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * sentence terminator ('.', '?', '!', etc.) character, but 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * are ended only by a hard separator (CR, LF, PS, etc.) or end of input. 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_SENTENCE_SEP = 100, 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Upper bound for tags for sentences ended by a separator. */ 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBRK_SENTENCE_SEP_LIMIT = 200 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** Tag value for a hard, or mandatory line break */ 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} USentenceBreakTag; 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Open a new UBreakIterator for locating text boundaries for a specified locale. 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * A UBreakIterator may be used for detecting character, line, word, 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and sentence breaks in text. 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD, 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UBRK_LINE, UBRK_SENTENCE 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param locale The locale specifying the text-breaking conventions. 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param text The text to be iterated over. 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param textLength The number of characters in text, or -1 if null-terminated. 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param status A UErrorCode to receive any errors. 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return A UBreakIterator for the specified locale. 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see ubrk_openRules 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE UBreakIterator* U_EXPORT2 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_open(UBreakIteratorType type, 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *locale, 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *text, 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t textLength, 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status); 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Open a new UBreakIterator for locating text boundaries using specified breaking rules. 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The rule syntax is ... (TBD) 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param rules A set of rules specifying the text breaking conventions. 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param rulesLength The number of characters in rules, or -1 if null-terminated. 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param text The text to be iterated over. May be null, in which case ubrk_setText() is 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * used to specify the text to be iterated. 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param textLength The number of characters in text, or -1 if null-terminated. 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param parseErr Receives position and context information for any syntax errors 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * detected while parsing the rules. 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param status A UErrorCode to receive any errors. 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return A UBreakIterator for the specified rules. 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see ubrk_open 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.2 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE UBreakIterator* U_EXPORT2 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_openRules(const UChar *rules, 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t rulesLength, 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *text, 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t textLength, 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError *parseErr, 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status); 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Thread safe cloning operation 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi iterator to be cloned 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * If buffer is not large enough, new memory will be allocated. 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations. 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pBufferSize pointer to size of allocated space. 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * If *pBufferSize == 0, a sufficient size for use in cloning will 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * be returned ('pre-flighting') 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * If *pBufferSize is not enough for a stack-based safe clone, 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * new memory will be allocated. 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param status to indicate whether the operation went on smoothly or there were errors 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary. 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return pointer to the new clone 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE UBreakIterator * U_EXPORT2 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_safeClone( 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UBreakIterator *bi, 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void *stackBuffer, 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *pBufferSize, 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status); 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone(). 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define U_BRK_SAFECLONE_BUFFERSIZE 512 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Close a UBreakIterator. 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Once closed, a UBreakIterator may no longer be used. 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param bi The break iterator to close. 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE void U_EXPORT2 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_close(UBreakIterator *bi); 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if U_SHOW_CPLUSPLUS_API 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * \class LocalUBreakIteratorPointer 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * "Smart pointer" class, closes a UBreakIterator via ubrk_close(). 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * For most methods see the LocalPointerBase base class. 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see LocalPointerBase 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see LocalPointer 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 4.4 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close); 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sets an existing iterator to point to a new piece of text 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi The iterator to use 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param text The text to be set 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param textLength The length of the text 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param status The error code 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE void U_EXPORT2 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_setText(UBreakIterator* bi, 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar* text, 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t textLength, 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode* status); 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sets an existing iterator to point to a new piece of text 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi The iterator to use 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param text The text to be set. 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This function makes a shallow clone of the supplied UText. This means 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * that the caller is free to immediately close or otherwise reuse the 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UText that was passed as a parameter, but that the underlying text itself 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * must not be altered while being referenced by the break iterator. 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param status The error code 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 3.4 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE void U_EXPORT2 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_setUText(UBreakIterator* bi, 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UText* text, 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode* status); 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Determine the most recently-returned text boundary. 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi The break iterator to use. 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous, 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * \ref ubrk_first, or \ref ubrk_last. 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE int32_t U_EXPORT2 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_current(const UBreakIterator *bi); 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Determine the text boundary following the current text boundary. 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi The break iterator to use. 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The character index of the next text boundary, or UBRK_DONE 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * if all text boundaries have been returned. 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see ubrk_previous 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE int32_t U_EXPORT2 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_next(UBreakIterator *bi); 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Determine the text boundary preceding the current text boundary. 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi The break iterator to use. 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The character index of the preceding text boundary, or UBRK_DONE 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * if all text boundaries have been returned. 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see ubrk_next 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE int32_t U_EXPORT2 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_previous(UBreakIterator *bi); 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Determine the index of the first character in the text being scanned. 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This is not always the same as index 0 of the text. 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi The break iterator to use. 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The character index of the first character in the text being scanned. 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see ubrk_last 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE int32_t U_EXPORT2 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_first(UBreakIterator *bi); 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Determine the index immediately <EM>beyond</EM> the last character in the text being 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * scanned. 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This is not the same as the last character. 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi The break iterator to use. 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The character offset immediately <EM>beyond</EM> the last character in the 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * text being scanned. 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see ubrk_first 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE int32_t U_EXPORT2 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_last(UBreakIterator *bi); 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Determine the text boundary preceding the specified offset. 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The value returned is always smaller than offset, or UBRK_DONE. 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi The break iterator to use. 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param offset The offset to begin scanning. 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The text boundary preceding offset, or UBRK_DONE. 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see ubrk_following 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE int32_t U_EXPORT2 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_preceding(UBreakIterator *bi, 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t offset); 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Determine the text boundary following the specified offset. 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The value returned is always greater than offset, or UBRK_DONE. 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi The break iterator to use. 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param offset The offset to begin scanning. 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The text boundary following offset, or UBRK_DONE. 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see ubrk_preceding 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.0 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE int32_t U_EXPORT2 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_following(UBreakIterator *bi, 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t offset); 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Get a locale for which text breaking information is available. 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* A UBreakIterator in a locale returned by this function will perform the correct 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* text breaking for the locale. 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param index The index of the desired locale. 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @return A locale for which number text breaking information is available, or 0 if none. 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @see ubrk_countAvailable 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @stable ICU 2.0 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE const char* U_EXPORT2 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_getAvailable(int32_t index); 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Determine how many locales have text breaking information available. 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* This function is most useful as determining the loop ending condition for 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* calls to \ref ubrk_getAvailable. 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @return The number of locales for which text breaking information is available. 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @see ubrk_getAvailable 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @stable ICU 2.0 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE int32_t U_EXPORT2 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_countAvailable(void); 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Returns true if the specfied position is a boundary position. As a side 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* effect, leaves the iterator pointing to the first boundary position at 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* or after "offset". 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param bi The break iterator to use. 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param offset the offset to check. 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @return True if "offset" is a boundary position. 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @stable ICU 2.0 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE UBool U_EXPORT2 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_isBoundary(UBreakIterator *bi, int32_t offset); 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the status from the break rule that determined the most recently 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * returned break position. The values appear in the rule source 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * within brackets, {123}, for example. For rules that do not specify a 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * status, a default value of 0 is returned. 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p> 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * For word break iterators, the possible values are defined in enum UWordBreak. 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.2 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE int32_t U_EXPORT2 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_getRuleStatus(UBreakIterator *bi); 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the statuses from the break rules that determined the most recently 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * returned break position. The values appear in the rule source 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * within brackets, {123}, for example. The default status value for rules 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * that do not explicitly provide one is zero. 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p> 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * For word break iterators, the possible values are defined in enum UWordBreak. 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi The break iterator to use 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param fillInVec an array to be filled in with the status values. 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param capacity the length of the supplied vector. A length of zero causes 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the function to return the number of status values, in the 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * normal way, without attemtping to store any values. 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param status receives error codes. 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return The number of rule status values from rules that determined 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the most recent boundary returned by the break iterator. 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 3.0 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE int32_t U_EXPORT2 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status); 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the locale of the break iterator. You can choose between the valid and 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the actual locale. 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param bi break iterator 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param type locale type (valid or actual) 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param status error code 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return locale string 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @stable ICU 2.8 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_STABLE const char* U_EXPORT2 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status); 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 502