1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 2003, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: unorm_it.h 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2003jan21 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef __UNORM_IT_H__ 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define __UNORM_IT_H__ 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uiter.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unorm.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Normalizing UCharIterator wrapper. 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This internal API basically duplicates the functionality of the C++ Normalizer 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - it actually implements a character iterator (UCharIterator) 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * with few restrictions (see unorm_setIter()) 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - it supports UCharIterator getState()/setState() 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - it uses lower-level APIs and buffers more text and states, 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * hopefully resulting in higher performance 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Usage example: 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \code 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * function(UCharIterator *srcIter) { 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UNormIterator *uni; 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UCharIterator *iter; 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UErrorCode errorCode; 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * errorCode=U_ZERO_ERROR; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * uni=unorm_openIter(&errorCode); 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if(U_FAILURE(errorCode)) { 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * // report error 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return; 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * } 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode); 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if(U_FAILURE(errorCode)) { 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * // report error 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * } else { 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * // use iter to iterate over the canonically ordered 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * // version of srcIter's text 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * uint32_t state; 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ... 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * state=uiter_getState(iter); 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if(state!=UITER_NO_STATE) { 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * // use valid state, store it, use iter some more 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ... 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * // later restore iter to the saved state: 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * uiter_setState(iter, state, &errorCode); 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ... 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * } 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ... 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * } 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * unorm_closeIter(uni); 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * } 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \endcode 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * See also the ICU test suites. 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct UNormIterator; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct UNormIterator UNormIterator; 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Size of a stack buffer to hold a UNormIterator, see the stackMem parameter 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of unorm_openIter(). 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UNORM_ITER_SIZE 1024 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Open a normalizing iterator. Must be closed later. 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Use unorm_setIter(). 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param stackMem Pointer to preallocated (stack-allocated) buffer to hold 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the UNormIterator if possible; can be NULL. 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param stackMemSize Number of bytes at stackMem; can be 0, 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or should be >= UNORM_ITER_SIZE for a non-NULL stackMem. 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pErrorCode ICU error code 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return an allocated and pre-initialized UNormIterator 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UNormIterator * U_EXPORT2 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruunorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode); 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Close a normalizing iterator. 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param uni UNormIterator from unorm_openIter() 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruunorm_closeIter(UNormIterator *uni); 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set a UCharIterator and a normalization mode for the normalizing iterator 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to wrap. The normalizing iterator will read from the character iterator, 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * normalize the text, and in turn deliver it with its own wrapper UCharIterator 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * interface which it returns. 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The source iterator remains at its current position through the unorm_setIter() 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * call but will be used and moved as soon as the 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the returned normalizing iterator is. 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The returned interface pointer is valid for as long as the normalizing iterator 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is open and until another unorm_setIter() call is made on it. 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The normalizing iterator's UCharIterator interface has the following properties: 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - getState() will return UITER_NO_STATE for unknown states for positions 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that are not at normalization boundaries 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param uni UNormIterator from unorm_openIter() 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param iter The source text UCharIterator to be wrapped. It is aliases into the normalizing iterator. 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Must support getState() and setState(). 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param mode The normalization mode. 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pErrorCode ICU error code 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return an alias to the normalizing iterator's UCharIterator interface 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UCharIterator * U_EXPORT2 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruunorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode); 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* uconfig.h switches */ 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 149