1/*
2 ******************************************************************************
3 *   Copyright (C) 1997-2008, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 ******************************************************************************
6 */
7
8/**
9 * \file
10 * \brief C++ API: Collation Element Iterator.
11 */
12
13/**
14* File coleitr.h
15*
16*
17*
18* Created by: Helena Shih
19*
20* Modification History:
21*
22*  Date       Name        Description
23*
24*  8/18/97    helena      Added internal API documentation.
25* 08/03/98    erm         Synched with 1.2 version CollationElementIterator.java
26* 12/10/99    aliu        Ported Thai collation support from Java.
27* 01/25/01    swquek      Modified into a C++ wrapper calling C APIs (ucoliter.h)
28* 02/19/01    swquek      Removed CollationElementsIterator() since it is
29*                         private constructor and no calls are made to it
30*/
31
32#ifndef COLEITR_H
33#define COLEITR_H
34
35#include "unicode/utypes.h"
36
37
38#if !UCONFIG_NO_COLLATION
39
40#include "unicode/uobject.h"
41#include "unicode/tblcoll.h"
42#include "unicode/ucoleitr.h"
43
44/**
45 * The UCollationElements struct.
46 * For usage in C programs.
47 * @stable ICU 2.0
48 */
49typedef struct UCollationElements UCollationElements;
50
51U_NAMESPACE_BEGIN
52
53/**
54* The CollationElementIterator class is used as an iterator to walk through
55* each character of an international string. Use the iterator to return the
56* ordering priority of the positioned character. The ordering priority of a
57* character, which we refer to as a key, defines how a character is collated in
58* the given collation object.
59* For example, consider the following in Spanish:
60* <pre>
61*        "ca" -> the first key is key('c') and second key is key('a').
62*        "cha" -> the first key is key('ch') and second key is key('a').</pre>
63* And in German,
64* <pre> \htmlonly       "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
65*        the third key is key('b'). \endhtmlonly </pre>
66* The key of a character, is an integer composed of primary order(short),
67* secondary order(char), and tertiary order(char). Java strictly defines the
68* size and signedness of its primitive data types. Therefore, the static
69* functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return
70* int32_t to ensure the correctness of the key value.
71* <p>Example of the iterator usage: (without error checking)
72* <pre>
73* \code
74*   void CollationElementIterator_Example()
75*   {
76*       UnicodeString str = "This is a test";
77*       UErrorCode success = U_ZERO_ERROR;
78*       RuleBasedCollator* rbc =
79*           (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
80*       CollationElementIterator* c =
81*           rbc->createCollationElementIterator( str );
82*       int32_t order = c->next(success);
83*       c->reset();
84*       order = c->previous(success);
85*       delete c;
86*       delete rbc;
87*   }
88* \endcode
89* </pre>
90* <p>
91* CollationElementIterator::next returns the collation order of the next
92* character based on the comparison level of the collator.
93* CollationElementIterator::previous returns the collation order of the
94* previous character based on the comparison level of the collator.
95* The Collation Element Iterator moves only in one direction between calls to
96* CollationElementIterator::reset. That is, CollationElementIterator::next()
97* and CollationElementIterator::previous can not be inter-used. Whenever
98* CollationElementIterator::previous is to be called after
99* CollationElementIterator::next() or vice versa,
100* CollationElementIterator::reset has to be called first to reset the status,
101* shifting pointers to either the end or the start of the string. Hence at the
102* next call of CollationElementIterator::previous or
103* CollationElementIterator::next(), the first or last collation order will be
104* returned.
105* If a change of direction is done without a CollationElementIterator::reset(),
106* the result is undefined.
107* The result of a forward iterate (CollationElementIterator::next) and
108* reversed result of the backward iterate (CollationElementIterator::previous)
109* on the same string are equivalent, if collation orders with the value
110* UCOL_IGNORABLE are ignored.
111* Character based on the comparison level of the collator.  A collation order
112* consists of primary order, secondary order and tertiary order.  The data
113* type of the collation order is <strong>t_int32</strong>.
114*
115* Note, CollationElementIterator should not be subclassed.
116* @see     Collator
117* @see     RuleBasedCollator
118* @version 1.8 Jan 16 2001
119*/
120class U_I18N_API CollationElementIterator : public UObject {
121public:
122
123    // CollationElementIterator public data member ------------------------------
124
125    enum {
126        /**
127         * NULLORDER indicates that an error has occured while processing
128         * @stable ICU 2.0
129         */
130        NULLORDER = (int32_t)0xffffffff
131    };
132
133    // CollationElementIterator public constructor/destructor -------------------
134
135    /**
136    * Copy constructor.
137    *
138    * @param other    the object to be copied from
139    * @stable ICU 2.0
140    */
141    CollationElementIterator(const CollationElementIterator& other);
142
143    /**
144    * Destructor
145    * @stable ICU 2.0
146    */
147    virtual ~CollationElementIterator();
148
149    // CollationElementIterator public methods ----------------------------------
150
151    /**
152    * Returns true if "other" is the same as "this"
153    *
154    * @param other    the object to be compared
155    * @return         true if "other" is the same as "this"
156    * @stable ICU 2.0
157    */
158    UBool operator==(const CollationElementIterator& other) const;
159
160    /**
161    * Returns true if "other" is not the same as "this".
162    *
163    * @param other    the object to be compared
164    * @return         true if "other" is not the same as "this"
165    * @stable ICU 2.0
166    */
167    UBool operator!=(const CollationElementIterator& other) const;
168
169    /**
170    * Resets the cursor to the beginning of the string.
171    * @stable ICU 2.0
172    */
173    void reset(void);
174
175    /**
176    * Gets the ordering priority of the next character in the string.
177    * @param status the error code status.
178    * @return the next character's ordering. otherwise returns NULLORDER if an
179    *         error has occured or if the end of string has been reached
180    * @stable ICU 2.0
181    */
182    int32_t next(UErrorCode& status);
183
184    /**
185    * Get the ordering priority of the previous collation element in the string.
186    * @param status the error code status.
187    * @return the previous element's ordering. otherwise returns NULLORDER if an
188    *         error has occured or if the start of string has been reached
189    * @stable ICU 2.0
190    */
191    int32_t previous(UErrorCode& status);
192
193    /**
194    * Gets the primary order of a collation order.
195    * @param order the collation order
196    * @return the primary order of a collation order.
197    * @stable ICU 2.0
198    */
199    static inline int32_t primaryOrder(int32_t order);
200
201    /**
202    * Gets the secondary order of a collation order.
203    * @param order the collation order
204    * @return the secondary order of a collation order.
205    * @stable ICU 2.0
206    */
207    static inline int32_t secondaryOrder(int32_t order);
208
209    /**
210    * Gets the tertiary order of a collation order.
211    * @param order the collation order
212    * @return the tertiary order of a collation order.
213    * @stable ICU 2.0
214    */
215    static inline int32_t tertiaryOrder(int32_t order);
216
217    /**
218    * Return the maximum length of any expansion sequences that end with the
219    * specified comparison order.
220    * @param order a collation order returned by previous or next.
221    * @return maximum size of the expansion sequences ending with the collation
222    *         element or 1 if collation element does not occur at the end of any
223    *         expansion sequence
224    * @stable ICU 2.0
225    */
226    int32_t getMaxExpansion(int32_t order) const;
227
228    /**
229    * Gets the comparison order in the desired strength. Ignore the other
230    * differences.
231    * @param order The order value
232    * @stable ICU 2.0
233    */
234    int32_t strengthOrder(int32_t order) const;
235
236    /**
237    * Sets the source string.
238    * @param str the source string.
239    * @param status the error code status.
240    * @stable ICU 2.0
241    */
242    void setText(const UnicodeString& str, UErrorCode& status);
243
244    /**
245    * Sets the source string.
246    * @param str the source character iterator.
247    * @param status the error code status.
248    * @stable ICU 2.0
249    */
250    void setText(CharacterIterator& str, UErrorCode& status);
251
252    /**
253    * Checks if a comparison order is ignorable.
254    * @param order the collation order.
255    * @return TRUE if a character is ignorable, FALSE otherwise.
256    * @stable ICU 2.0
257    */
258    static inline UBool isIgnorable(int32_t order);
259
260    /**
261    * Gets the offset of the currently processed character in the source string.
262    * @return the offset of the character.
263    * @stable ICU 2.0
264    */
265    int32_t getOffset(void) const;
266
267    /**
268    * Sets the offset of the currently processed character in the source string.
269    * @param newOffset the new offset.
270    * @param status the error code status.
271    * @return the offset of the character.
272    * @stable ICU 2.0
273    */
274    void setOffset(int32_t newOffset, UErrorCode& status);
275
276    /**
277    * ICU "poor man's RTTI", returns a UClassID for the actual class.
278    *
279    * @stable ICU 2.2
280    */
281    virtual UClassID getDynamicClassID() const;
282
283    /**
284    * ICU "poor man's RTTI", returns a UClassID for this class.
285    *
286    * @stable ICU 2.2
287    */
288    static UClassID U_EXPORT2 getStaticClassID();
289
290protected:
291
292    // CollationElementIterator protected constructors --------------------------
293    /**
294    * @stable ICU 2.0
295    */
296    friend class RuleBasedCollator;
297
298    /**
299    * CollationElementIterator constructor. This takes the source string and the
300    * collation object. The cursor will walk thru the source string based on the
301    * predefined collation rules. If the source string is empty, NULLORDER will
302    * be returned on the calls to next().
303    * @param sourceText    the source string.
304    * @param order         the collation object.
305    * @param status        the error code status.
306    * @stable ICU 2.0
307    */
308    CollationElementIterator(const UnicodeString& sourceText,
309        const RuleBasedCollator* order, UErrorCode& status);
310
311    /**
312    * CollationElementIterator constructor. This takes the source string and the
313    * collation object.  The cursor will walk thru the source string based on the
314    * predefined collation rules.  If the source string is empty, NULLORDER will
315    * be returned on the calls to next().
316    * @param sourceText    the source string.
317    * @param order         the collation object.
318    * @param status        the error code status.
319    * @stable ICU 2.0
320    */
321    CollationElementIterator(const CharacterIterator& sourceText,
322        const RuleBasedCollator* order, UErrorCode& status);
323
324    // CollationElementIterator protected methods -------------------------------
325
326    /**
327    * Assignment operator
328    *
329    * @param other    the object to be copied
330    * @stable ICU 2.0
331    */
332    const CollationElementIterator&
333        operator=(const CollationElementIterator& other);
334
335private:
336    CollationElementIterator(); // default constructor not implemented
337
338    // CollationElementIterator private data members ----------------------------
339
340    /**
341    * Data wrapper for collation elements
342    */
343    UCollationElements *m_data_;
344
345    /**
346    * Indicates if m_data_ belongs to this object.
347    */
348    UBool isDataOwned_;
349
350};
351
352// CollationElementIterator inline method defination --------------------------
353
354/**
355* Get the primary order of a collation order.
356* @param order the collation order
357* @return the primary order of a collation order.
358*/
359inline int32_t CollationElementIterator::primaryOrder(int32_t order)
360{
361    order &= RuleBasedCollator::PRIMARYORDERMASK;
362    return (order >> RuleBasedCollator::PRIMARYORDERSHIFT);
363}
364
365/**
366* Get the secondary order of a collation order.
367* @param order the collation order
368* @return the secondary order of a collation order.
369*/
370inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
371{
372    order = order & RuleBasedCollator::SECONDARYORDERMASK;
373    return (order >> RuleBasedCollator::SECONDARYORDERSHIFT);
374}
375
376/**
377* Get the tertiary order of a collation order.
378* @param order the collation order
379* @return the tertiary order of a collation order.
380*/
381inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
382{
383    return (order &= RuleBasedCollator::TERTIARYORDERMASK);
384}
385
386inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const
387{
388    return ucol_getMaxExpansion(m_data_, (uint32_t)order);
389}
390
391inline UBool CollationElementIterator::isIgnorable(int32_t order)
392{
393    return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE);
394}
395
396U_NAMESPACE_END
397
398#endif /* #if !UCONFIG_NO_COLLATION */
399
400#endif
401