1/*
2*******************************************************************************
3*
4*   Copyright (C) 2003, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  unorm_it.h
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2003jan21
14*   created by: Markus W. Scherer
15*/
16
17#ifndef __UNORM_IT_H__
18#define __UNORM_IT_H__
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
23
24#include "unicode/uiter.h"
25#include "unicode/unorm.h"
26
27/**
28 * Normalizing UCharIterator wrapper.
29 * This internal API basically duplicates the functionality of the C++ Normalizer
30 * but
31 * - it actually implements a character iterator (UCharIterator)
32 *   with few restrictions (see unorm_setIter())
33 * - it supports UCharIterator getState()/setState()
34 * - it uses lower-level APIs and buffers more text and states,
35 *   hopefully resulting in higher performance
36 *
37 * Usage example:
38 * \code
39 * function(UCharIterator *srcIter) {
40 *     UNormIterator *uni;
41 *     UCharIterator *iter;
42 *     UErrorCode errorCode;
43 *
44 *     errorCode=U_ZERO_ERROR;
45 *     uni=unorm_openIter(&errorCode);
46 *     if(U_FAILURE(errorCode)) {
47 *         // report error
48 *         return;
49 *     }
50 *
51 *     iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode);
52 *     if(U_FAILURE(errorCode)) {
53 *         // report error
54 *     } else {
55 *         // use iter to iterate over the canonically ordered
56 *         // version of srcIter's text
57 *         uint32_t state;
58 *
59 *         ...
60 *
61 *         state=uiter_getState(iter);
62 *         if(state!=UITER_NO_STATE) {
63 *             // use valid state, store it, use iter some more
64 *             ...
65 *
66 *             // later restore iter to the saved state:
67 *             uiter_setState(iter, state, &errorCode);
68 *
69 *             ...
70 *         }
71 *
72 *         ...
73 *     }
74 *     unorm_closeIter(uni);
75 * }
76 * \endcode
77 *
78 * See also the ICU test suites.
79 *
80 * @internal
81 */
82struct UNormIterator;
83typedef struct UNormIterator UNormIterator;
84
85/**
86 * Size of a stack buffer to hold a UNormIterator, see the stackMem parameter
87 * of unorm_openIter().
88 *
89 * @internal
90 */
91#define UNORM_ITER_SIZE 1024
92
93/**
94 * Open a normalizing iterator. Must be closed later.
95 * Use unorm_setIter().
96 *
97 * @param stackMem Pointer to preallocated (stack-allocated) buffer to hold
98 *                 the UNormIterator if possible; can be NULL.
99 * @param stackMemSize Number of bytes at stackMem; can be 0,
100 *                     or should be >= UNORM_ITER_SIZE for a non-NULL stackMem.
101 * @param pErrorCode ICU error code
102 * @return an allocated and pre-initialized UNormIterator
103 * @internal
104 */
105U_CAPI UNormIterator * U_EXPORT2
106unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode);
107
108/**
109 * Close a normalizing iterator.
110 *
111 * @param uni UNormIterator from unorm_openIter()
112 * @internal
113 */
114U_CAPI void U_EXPORT2
115unorm_closeIter(UNormIterator *uni);
116
117/**
118 * Set a UCharIterator and a normalization mode for the normalizing iterator
119 * to wrap. The normalizing iterator will read from the character iterator,
120 * normalize the text, and in turn deliver it with its own wrapper UCharIterator
121 * interface which it returns.
122 *
123 * The source iterator remains at its current position through the unorm_setIter()
124 * call but will be used and moved as soon as the
125 * the returned normalizing iterator is.
126 *
127 * The returned interface pointer is valid for as long as the normalizing iterator
128 * is open and until another unorm_setIter() call is made on it.
129 *
130 * The normalizing iterator's UCharIterator interface has the following properties:
131 * - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX
132 * - getState() will return UITER_NO_STATE for unknown states for positions
133 *              that are not at normalization boundaries
134 *
135 * @param uni UNormIterator from unorm_openIter()
136 * @param iter The source text UCharIterator to be wrapped. It is aliases into the normalizing iterator.
137 *             Must support getState() and setState().
138 * @param mode The normalization mode.
139 * @param pErrorCode ICU error code
140 * @return an alias to the normalizing iterator's UCharIterator interface
141 * @internal
142 */
143U_CAPI UCharIterator * U_EXPORT2
144unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode);
145
146#endif /* uconfig.h switches */
147
148#endif
149