1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Copyright (C) 2003, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   file name:  unorm_it.h
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2003jan21
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Markus W. Scherer
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef __UNORM_IT_H__
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define __UNORM_IT_H__
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uiter.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unorm.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Normalizing UCharIterator wrapper.
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This internal API basically duplicates the functionality of the C++ Normalizer
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - it actually implements a character iterator (UCharIterator)
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   with few restrictions (see unorm_setIter())
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - it supports UCharIterator getState()/setState()
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - it uses lower-level APIs and buffers more text and states,
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   hopefully resulting in higher performance
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Usage example:
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \code
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * function(UCharIterator *srcIter) {
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     UNormIterator *uni;
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     UCharIterator *iter;
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     UErrorCode errorCode;
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     errorCode=U_ZERO_ERROR;
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     uni=unorm_openIter(&errorCode);
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     if(U_FAILURE(errorCode)) {
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         // report error
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         return;
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     }
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode);
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     if(U_FAILURE(errorCode)) {
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         // report error
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     } else {
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         // use iter to iterate over the canonically ordered
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         // version of srcIter's text
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         uint32_t state;
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         ...
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         state=uiter_getState(iter);
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         if(state!=UITER_NO_STATE) {
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             // use valid state, store it, use iter some more
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             ...
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             // later restore iter to the saved state:
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             uiter_setState(iter, state, &errorCode);
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             ...
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         }
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         ...
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     }
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *     unorm_closeIter(uni);
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * }
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \endcode
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * See also the ICU test suites.
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct UNormIterator;
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct UNormIterator UNormIterator;
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Size of a stack buffer to hold a UNormIterator, see the stackMem parameter
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of unorm_openIter().
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UNORM_ITER_SIZE 1024
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Open a normalizing iterator. Must be closed later.
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Use unorm_setIter().
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param stackMem Pointer to preallocated (stack-allocated) buffer to hold
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                 the UNormIterator if possible; can be NULL.
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param stackMemSize Number of bytes at stackMem; can be 0,
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                     or should be >= UNORM_ITER_SIZE for a non-NULL stackMem.
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pErrorCode ICU error code
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return an allocated and pre-initialized UNormIterator
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UNormIterator * U_EXPORT2
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruunorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode);
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Close a normalizing iterator.
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param uni UNormIterator from unorm_openIter()
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruunorm_closeIter(UNormIterator *uni);
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set a UCharIterator and a normalization mode for the normalizing iterator
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to wrap. The normalizing iterator will read from the character iterator,
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * normalize the text, and in turn deliver it with its own wrapper UCharIterator
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * interface which it returns.
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The source iterator remains at its current position through the unorm_setIter()
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * call but will be used and moved as soon as the
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the returned normalizing iterator is.
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The returned interface pointer is valid for as long as the normalizing iterator
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is open and until another unorm_setIter() call is made on it.
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The normalizing iterator's UCharIterator interface has the following properties:
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - getState() will return UITER_NO_STATE for unknown states for positions
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *              that are not at normalization boundaries
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param uni UNormIterator from unorm_openIter()
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param iter The source text UCharIterator to be wrapped. It is aliases into the normalizing iterator.
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             Must support getState() and setState().
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param mode The normalization mode.
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pErrorCode ICU error code
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return an alias to the normalizing iterator's UCharIterator interface
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UCharIterator * U_EXPORT2
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruunorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode);
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* uconfig.h switches */
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
149