1/*
2******************************************************************************
3* Copyright (C) 1996-2011, International Business Machines Corporation and
4* others. All Rights Reserved.
5******************************************************************************
6*/
7
8/**
9 * \file
10 * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
11 */
12
13/**
14* File tblcoll.h
15*
16* Created by: Helena Shih
17*
18* Modification History:
19*
20*  Date        Name        Description
21*  2/5/97      aliu        Added streamIn and streamOut methods.  Added
22*                          constructor which reads RuleBasedCollator object from
23*                          a binary file.  Added writeToFile method which streams
24*                          RuleBasedCollator out to a binary file.  The streamIn
25*                          and streamOut methods use istream and ostream objects
26*                          in binary mode.
27*  2/12/97     aliu        Modified to use TableCollationData sub-object to
28*                          hold invariant data.
29*  2/13/97     aliu        Moved several methods into this class from Collation.
30*                          Added a private RuleBasedCollator(Locale&) constructor,
31*                          to be used by Collator::createDefault().  General
32*                          clean up.
33*  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
34*                          constructor and getDynamicClassID.
35*  3/5/97      aliu        Modified constructFromFile() to add parameter
36*                          specifying whether or not binary loading is to be
37*                          attempted.  This is required for dynamic rule loading.
38* 05/07/97     helena      Added memory allocation error detection.
39*  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
40*                          use MergeCollation::getPattern.
41*  6/20/97     helena      Java class name change.
42*  8/18/97     helena      Added internal API documentation.
43* 09/03/97     helena      Added createCollationKeyValues().
44* 02/10/98     damiba      Added compare with "length" parameter
45* 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
46* 04/23/99     stephen     Removed EDecompositionMode, merged with
47*                          Normalizer::EMode
48* 06/14/99     stephen     Removed kResourceBundleSuffix
49* 11/02/99     helena      Collator performance enhancements.  Eliminates the
50*                          UnicodeString construction and special case for NO_OP.
51* 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
52*                          internal state management.
53* 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
54*                          to implementation file.
55* 01/29/01     synwee      Modified into a C++ wrapper which calls C API
56*                          (ucol.h)
57*/
58
59#ifndef TBLCOLL_H
60#define TBLCOLL_H
61
62#include "unicode/utypes.h"
63
64
65#if !UCONFIG_NO_COLLATION
66
67#include "unicode/coll.h"
68#include "unicode/ucol.h"
69#include "unicode/sortkey.h"
70#include "unicode/normlzr.h"
71
72U_NAMESPACE_BEGIN
73
74/**
75* @stable ICU 2.0
76*/
77class StringSearch;
78/**
79* @stable ICU 2.0
80*/
81class CollationElementIterator;
82
83/**
84 * The RuleBasedCollator class provides the simple implementation of
85 * Collator, using data-driven tables. The user can create a customized
86 * table-based collation.
87 * <P>
88 * <em>Important: </em>The ICU collation service has been reimplemented
89 * in order to achieve better performance and UCA compliance.
90 * For details, see the
91 * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
92 * collation design document</a>.
93 * <p>
94 * RuleBasedCollator is a thin C++ wrapper over the C implementation.
95 * <p>
96 * For more information about the collation service see
97 * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
98 * <p>
99 * Collation service provides correct sorting orders for most locales supported in ICU.
100 * If specific data for a locale is not available, the orders eventually falls back
101 * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>.
102 * <p>
103 * Sort ordering may be customized by providing your own set of rules. For more on
104 * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
105 * Collation customization</a> section of the users guide.
106 * <p>
107 * Note, RuleBasedCollator is not to be subclassed.
108 * @see        Collator
109 * @version    2.0 11/15/2001
110 */
111class U_I18N_API RuleBasedCollator : public Collator
112{
113public:
114
115  // constructor -------------------------------------------------------------
116
117    /**
118     * RuleBasedCollator constructor. This takes the table rules and builds a
119     * collation table out of them. Please see RuleBasedCollator class
120     * description for more details on the collation rule syntax.
121     * @param rules the collation rules to build the collation table from.
122     * @param status reporting a success or an error.
123     * @see Locale
124     * @stable ICU 2.0
125     */
126    RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
127
128    /**
129     * RuleBasedCollator constructor. This takes the table rules and builds a
130     * collation table out of them. Please see RuleBasedCollator class
131     * description for more details on the collation rule syntax.
132     * @param rules the collation rules to build the collation table from.
133     * @param collationStrength default strength for comparison
134     * @param status reporting a success or an error.
135     * @see Locale
136     * @stable ICU 2.0
137     */
138    RuleBasedCollator(const UnicodeString& rules,
139                       ECollationStrength collationStrength,
140                       UErrorCode& status);
141
142    /**
143     * RuleBasedCollator constructor. This takes the table rules and builds a
144     * collation table out of them. Please see RuleBasedCollator class
145     * description for more details on the collation rule syntax.
146     * @param rules the collation rules to build the collation table from.
147     * @param decompositionMode the normalisation mode
148     * @param status reporting a success or an error.
149     * @see Locale
150     * @stable ICU 2.0
151     */
152    RuleBasedCollator(const UnicodeString& rules,
153                    UColAttributeValue decompositionMode,
154                    UErrorCode& status);
155
156    /**
157     * RuleBasedCollator constructor. This takes the table rules and builds a
158     * collation table out of them. Please see RuleBasedCollator class
159     * description for more details on the collation rule syntax.
160     * @param rules the collation rules to build the collation table from.
161     * @param collationStrength default strength for comparison
162     * @param decompositionMode the normalisation mode
163     * @param status reporting a success or an error.
164     * @see Locale
165     * @stable ICU 2.0
166     */
167    RuleBasedCollator(const UnicodeString& rules,
168                    ECollationStrength collationStrength,
169                    UColAttributeValue decompositionMode,
170                    UErrorCode& status);
171
172    /**
173     * Copy constructor.
174     * @param other the RuleBasedCollator object to be copied
175     * @see Locale
176     * @stable ICU 2.0
177     */
178    RuleBasedCollator(const RuleBasedCollator& other);
179
180
181    /** Opens a collator from a collator binary image created using
182    *  cloneBinary. Binary image used in instantiation of the
183    *  collator remains owned by the user and should stay around for
184    *  the lifetime of the collator. The API also takes a base collator
185    *  which usualy should be UCA.
186    *  @param bin binary image owned by the user and required through the
187    *             lifetime of the collator
188    *  @param length size of the image. If negative, the API will try to
189    *                figure out the length of the image
190    *  @param base fallback collator, usually UCA. Base is required to be
191    *              present through the lifetime of the collator. Currently
192    *              it cannot be NULL.
193    *  @param status for catching errors
194    *  @return newly created collator
195    *  @see cloneBinary
196    *  @stable ICU 3.4
197    */
198    RuleBasedCollator(const uint8_t *bin, int32_t length,
199                    const RuleBasedCollator *base,
200                    UErrorCode &status);
201    // destructor --------------------------------------------------------------
202
203    /**
204     * Destructor.
205     * @stable ICU 2.0
206     */
207    virtual ~RuleBasedCollator();
208
209    // public methods ----------------------------------------------------------
210
211    /**
212     * Assignment operator.
213     * @param other other RuleBasedCollator object to compare with.
214     * @stable ICU 2.0
215     */
216    RuleBasedCollator& operator=(const RuleBasedCollator& other);
217
218    /**
219     * Returns true if argument is the same as this object.
220     * @param other Collator object to be compared.
221     * @return true if arguments is the same as this object.
222     * @stable ICU 2.0
223     */
224    virtual UBool operator==(const Collator& other) const;
225
226    /**
227     * Returns true if argument is not the same as this object.
228     * @param other Collator object to be compared
229     * @return returns true if argument is not the same as this object.
230     * @stable ICU 2.0
231     */
232    virtual UBool operator!=(const Collator& other) const;
233
234    /**
235     * Makes a deep copy of the object.
236     * The caller owns the returned object.
237     * @return the cloned object.
238     * @stable ICU 2.0
239     */
240    virtual Collator* clone(void) const;
241
242    /**
243     * Creates a collation element iterator for the source string. The caller of
244     * this method is responsible for the memory management of the return
245     * pointer.
246     * @param source the string over which the CollationElementIterator will
247     *        iterate.
248     * @return the collation element iterator of the source string using this as
249     *         the based Collator.
250     * @stable ICU 2.2
251     */
252    virtual CollationElementIterator* createCollationElementIterator(
253                                           const UnicodeString& source) const;
254
255    /**
256     * Creates a collation element iterator for the source. The caller of this
257     * method is responsible for the memory management of the returned pointer.
258     * @param source the CharacterIterator which produces the characters over
259     *        which the CollationElementItgerator will iterate.
260     * @return the collation element iterator of the source using this as the
261     *         based Collator.
262     * @stable ICU 2.2
263     */
264    virtual CollationElementIterator* createCollationElementIterator(
265                                         const CharacterIterator& source) const;
266
267    /**
268     * Compares a range of character data stored in two different strings based
269     * on the collation rules. Returns information about whether a string is
270     * less than, greater than or equal to another string in a language.
271     * This can be overriden in a subclass.
272     * @param source the source string.
273     * @param target the target string to be compared with the source string.
274     * @return the comparison result. GREATER if the source string is greater
275     *         than the target string, LESS if the source is less than the
276     *         target. Otherwise, returns EQUAL.
277     * @deprecated ICU 2.6 Use overload with UErrorCode&
278     */
279    virtual EComparisonResult compare(const UnicodeString& source,
280                                      const UnicodeString& target) const;
281
282
283    /**
284    * The comparison function compares the character data stored in two
285    * different strings. Returns information about whether a string is less
286    * than, greater than or equal to another string.
287    * @param source the source string to be compared with.
288    * @param target the string that is to be compared with the source string.
289    * @param status possible error code
290    * @return Returns an enum value. UCOL_GREATER if source is greater
291    * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
292    * than target
293    * @stable ICU 2.6
294    **/
295    virtual UCollationResult compare(const UnicodeString& source,
296                                      const UnicodeString& target,
297                                      UErrorCode &status) const;
298
299    /**
300     * Compares a range of character data stored in two different strings based
301     * on the collation rules up to the specified length. Returns information
302     * about whether a string is less than, greater than or equal to another
303     * string in a language. This can be overriden in a subclass.
304     * @param source the source string.
305     * @param target the target string to be compared with the source string.
306     * @param length compares up to the specified length
307     * @return the comparison result. GREATER if the source string is greater
308     *         than the target string, LESS if the source is less than the
309     *         target. Otherwise, returns EQUAL.
310     * @deprecated ICU 2.6 Use overload with UErrorCode&
311     */
312    virtual EComparisonResult compare(const UnicodeString& source,
313                                      const UnicodeString&  target,
314                                      int32_t length) const;
315
316    /**
317    * Does the same thing as compare but limits the comparison to a specified
318    * length
319    * @param source the source string to be compared with.
320    * @param target the string that is to be compared with the source string.
321    * @param length the length the comparison is limited to
322    * @param status possible error code
323    * @return Returns an enum value. UCOL_GREATER if source (up to the specified
324    *         length) is greater than target; UCOL_EQUAL if source (up to specified
325    *         length) is equal to target; UCOL_LESS if source (up to the specified
326    *         length) is less  than target.
327    * @stable ICU 2.6
328    */
329    virtual UCollationResult compare(const UnicodeString& source,
330                                      const UnicodeString& target,
331                                      int32_t length,
332                                      UErrorCode &status) const;
333
334    /**
335     * The comparison function compares the character data stored in two
336     * different string arrays. Returns information about whether a string array
337     * is less than, greater than or equal to another string array.
338     * <p>Example of use:
339     * <pre>
340     * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
341     * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
342     * .       UErrorCode status = U_ZERO_ERROR;
343     * .       Collator *myCollation =
344     * .                         Collator::createInstance(Locale::US, status);
345     * .       if (U_FAILURE(status)) return;
346     * .       myCollation->setStrength(Collator::PRIMARY);
347     * .       // result would be Collator::EQUAL ("abc" == "ABC")
348     * .       // (no primary difference between "abc" and "ABC")
349     * .       Collator::EComparisonResult result =
350     * .                             myCollation->compare(abc, 3, ABC, 3);
351     * .       myCollation->setStrength(Collator::TERTIARY);
352     * .       // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
353     * .       // (with tertiary difference between "abc" and "ABC")
354     * .       result =  myCollation->compare(abc, 3, ABC, 3);
355     * </pre>
356     * @param source the source string array to be compared with.
357     * @param sourceLength the length of the source string array. If this value
358     *        is equal to -1, the string array is null-terminated.
359     * @param target the string that is to be compared with the source string.
360     * @param targetLength the length of the target string array. If this value
361     *        is equal to -1, the string array is null-terminated.
362     * @return Returns a byte value. GREATER if source is greater than target;
363     *         EQUAL if source is equal to target; LESS if source is less than
364     *         target
365     * @deprecated ICU 2.6 Use overload with UErrorCode&
366     */
367    virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
368                                      const UChar* target, int32_t targetLength)
369                                      const;
370
371    /**
372    * The comparison function compares the character data stored in two
373    * different string arrays. Returns information about whether a string array
374    * is less than, greater than or equal to another string array.
375    * @param source the source string array to be compared with.
376    * @param sourceLength the length of the source string array.  If this value
377    *        is equal to -1, the string array is null-terminated.
378    * @param target the string that is to be compared with the source string.
379    * @param targetLength the length of the target string array.  If this value
380    *        is equal to -1, the string array is null-terminated.
381    * @param status possible error code
382    * @return Returns an enum value. UCOL_GREATER if source is greater
383    * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
384    * than target
385    * @stable ICU 2.6
386    */
387    virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
388                                      const UChar* target, int32_t targetLength,
389                                      UErrorCode &status) const;
390
391    /**
392     * Compares two strings using the Collator.
393     * Returns whether the first one compares less than/equal to/greater than
394     * the second one.
395     * This version takes UCharIterator input.
396     * @param sIter the first ("source") string iterator
397     * @param tIter the second ("target") string iterator
398     * @param status ICU status
399     * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
400     * @stable ICU 4.2
401     */
402    virtual UCollationResult compare(UCharIterator &sIter,
403                                     UCharIterator &tIter,
404                                     UErrorCode &status) const;
405
406    /**
407    * Transforms a specified region of the string into a series of characters
408    * that can be compared with CollationKey.compare. Use a CollationKey when
409    * you need to do repeated comparisions on the same string. For a single
410    * comparison the compare method will be faster.
411    * @param source the source string.
412    * @param key the transformed key of the source string.
413    * @param status the error code status.
414    * @return the transformed key.
415    * @see CollationKey
416    * @deprecated ICU 2.8 Use getSortKey(...) instead
417    */
418    virtual CollationKey& getCollationKey(const UnicodeString& source,
419                                          CollationKey& key,
420                                          UErrorCode& status) const;
421
422    /**
423    * Transforms a specified region of the string into a series of characters
424    * that can be compared with CollationKey.compare. Use a CollationKey when
425    * you need to do repeated comparisions on the same string. For a single
426    * comparison the compare method will be faster.
427    * @param source the source string.
428    * @param sourceLength the length of the source string.
429    * @param key the transformed key of the source string.
430    * @param status the error code status.
431    * @return the transformed key.
432    * @see CollationKey
433    * @deprecated ICU 2.8 Use getSortKey(...) instead
434    */
435    virtual CollationKey& getCollationKey(const UChar *source,
436                                          int32_t sourceLength,
437                                          CollationKey& key,
438                                          UErrorCode& status) const;
439
440    /**
441     * Generates the hash code for the rule-based collation object.
442     * @return the hash code.
443     * @stable ICU 2.0
444     */
445    virtual int32_t hashCode(void) const;
446
447    /**
448    * Gets the locale of the Collator
449    * @param type can be either requested, valid or actual locale. For more
450    *             information see the definition of ULocDataLocaleType in
451    *             uloc.h
452    * @param status the error code status.
453    * @return locale where the collation data lives. If the collator
454    *         was instantiated from rules, locale is empty.
455    * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
456    */
457    virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
458
459    /**
460     * Gets the table-based rules for the collation object.
461     * @return returns the collation rules that the table collation object was
462     *         created from.
463     * @stable ICU 2.0
464     */
465    const UnicodeString& getRules(void) const;
466
467    /**
468     * Gets the version information for a Collator.
469     * @param info the version # information, the result will be filled in
470     * @stable ICU 2.0
471     */
472    virtual void getVersion(UVersionInfo info) const;
473
474    /**
475     * Return the maximum length of any expansion sequences that end with the
476     * specified comparison order.
477     * @param order a collation order returned by previous or next.
478     * @return maximum size of the expansion sequences ending with the collation
479     *         element or 1 if collation element does not occur at the end of
480     *         any expansion sequence
481     * @see CollationElementIterator#getMaxExpansion
482     * @stable ICU 2.0
483     */
484    int32_t getMaxExpansion(int32_t order) const;
485
486    /**
487     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
488     * method is to implement a simple version of RTTI, since not all C++
489     * compilers support genuine RTTI. Polymorphic operator==() and clone()
490     * methods call this method.
491     * @return The class ID for this object. All objects of a given class have
492     *         the same class ID. Objects of other classes have different class
493     *         IDs.
494     * @stable ICU 2.0
495     */
496    virtual UClassID getDynamicClassID(void) const;
497
498    /**
499     * Returns the class ID for this class. This is useful only for comparing to
500     * a return value from getDynamicClassID(). For example:
501     * <pre>
502     * Base* polymorphic_pointer = createPolymorphicObject();
503     * if (polymorphic_pointer->getDynamicClassID() ==
504     *                                          Derived::getStaticClassID()) ...
505     * </pre>
506     * @return The class ID for all objects of this class.
507     * @stable ICU 2.0
508     */
509    static UClassID U_EXPORT2 getStaticClassID(void);
510
511    /**
512     * Returns the binary format of the class's rules. The format is that of
513     * .col files.
514     * @param length Returns the length of the data, in bytes
515     * @param status the error code status.
516     * @return memory, owned by the caller, of size 'length' bytes.
517     * @stable ICU 2.2
518     */
519    uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
520
521
522    /** Creates a binary image of a collator. This binary image can be stored and
523    *  later used to instantiate a collator using ucol_openBinary.
524    *  This API supports preflighting.
525    *  @param buffer a fill-in buffer to receive the binary image
526    *  @param capacity capacity of the destination buffer
527    *  @param status for catching errors
528    *  @return size of the image
529    *  @see ucol_openBinary
530    *  @stable ICU 3.4
531    */
532    int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
533
534    /**
535     * Returns current rules. Delta defines whether full rules are returned or
536     * just the tailoring.
537     * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
538     * @param buffer UnicodeString to store the result rules
539     * @stable ICU 2.2
540     */
541    void getRules(UColRuleOption delta, UnicodeString &buffer);
542
543    /**
544     * Universal attribute setter
545     * @param attr attribute type
546     * @param value attribute value
547     * @param status to indicate whether the operation went on smoothly or there were errors
548     * @stable ICU 2.2
549     */
550    virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
551                              UErrorCode &status);
552
553    /**
554     * Universal attribute getter.
555     * @param attr attribute type
556     * @param status to indicate whether the operation went on smoothly or there were errors
557     * @return attribute value
558     * @stable ICU 2.2
559     */
560    virtual UColAttributeValue getAttribute(UColAttribute attr,
561                                            UErrorCode &status);
562
563    /**
564     * Sets the variable top to a collation element value of a string supplied.
565     * @param varTop one or more (if contraction) UChars to which the variable top should be set
566     * @param len length of variable top string. If -1 it is considered to be zero terminated.
567     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
568     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
569     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
570     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
571     * @stable ICU 2.0
572     */
573    virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
574
575    /**
576     * Sets the variable top to a collation element value of a string supplied.
577     * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
578     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
579     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
580     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
581     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
582     * @stable ICU 2.0
583     */
584    virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status);
585
586    /**
587     * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
588     * Lower 16 bits are ignored.
589     * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
590     * @param status error code (not changed by function)
591     * @stable ICU 2.0
592     */
593    virtual void setVariableTop(const uint32_t varTop, UErrorCode &status);
594
595    /**
596     * Gets the variable top value of a Collator.
597     * Lower 16 bits are undefined and should be ignored.
598     * @param status error code (not changed by function). If error code is set, the return value is undefined.
599     * @stable ICU 2.0
600     */
601    virtual uint32_t getVariableTop(UErrorCode &status) const;
602
603    /**
604     * Get an UnicodeSet that contains all the characters and sequences tailored in
605     * this collator.
606     * @param status      error code of the operation
607     * @return a pointer to a UnicodeSet object containing all the
608     *         code points and sequences that may sort differently than
609     *         in the UCA. The object must be disposed of by using delete
610     * @stable ICU 2.4
611     */
612    virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
613
614    /**
615     * Thread safe cloning operation.
616     * @return pointer to the new clone, user should remove it.
617     * @stable ICU 2.2
618     */
619    virtual Collator* safeClone(void);
620
621    /**
622     * Get the sort key as an array of bytes from an UnicodeString.
623     * @param source string to be processed.
624     * @param result buffer to store result in. If NULL, number of bytes needed
625     *        will be returned.
626     * @param resultLength length of the result buffer. If if not enough the
627     *        buffer will be filled to capacity.
628     * @return Number of bytes needed for storing the sort key
629     * @stable ICU 2.0
630     */
631    virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
632                               int32_t resultLength) const;
633
634    /**
635     * Get the sort key as an array of bytes from an UChar buffer.
636     * @param source string to be processed.
637     * @param sourceLength length of string to be processed. If -1, the string
638     *        is 0 terminated and length will be decided by the function.
639     * @param result buffer to store result in. If NULL, number of bytes needed
640     *        will be returned.
641     * @param resultLength length of the result buffer. If if not enough the
642     *        buffer will be filled to capacity.
643     * @return Number of bytes needed for storing the sort key
644     * @stable ICU 2.2
645     */
646    virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
647                               uint8_t *result, int32_t resultLength) const;
648
649    /**
650    * Determines the minimum strength that will be use in comparison or
651    * transformation.
652    * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
653    * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
654    * are ignored.
655    * @return the current comparison level.
656    * @see RuleBasedCollator#setStrength
657    * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
658    */
659    virtual ECollationStrength getStrength(void) const;
660
661    /**
662    * Sets the minimum strength to be used in comparison or transformation.
663    * @see RuleBasedCollator#getStrength
664    * @param newStrength the new comparison level.
665    * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
666    */
667    virtual void setStrength(ECollationStrength newStrength);
668
669    /**
670     * Retrieves the reordering codes for this collator.
671     * @param dest The array to fill with the script ordering.
672     * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
673     *  will only return the length of the result without writing any of the result string (pre-flighting).
674     * @param status A reference to an error code value, which must not indicate
675     * a failure before the function call.
676     * @return The length of the script ordering array.
677     * @see ucol_setReorderCodes
678     * @see Collator#getEquivalentReorderCodes
679     * @see Collator#setReorderCodes
680     * @draft ICU 4.8
681     */
682     virtual int32_t U_EXPORT2 getReorderCodes(int32_t *dest,
683                                    int32_t destCapacity,
684                                    UErrorCode& status) const;
685
686    /**
687     * Sets the ordering of scripts for this collator.
688     * @param reorderCodes An array of script codes in the new order. This can be NULL if the
689     * length is also set to 0. An empty array will clear any reordering codes on the collator.
690     * @param reorderCodesLength The length of reorderCodes.
691     * @param status error code
692     * @see Collator#getReorderCodes
693     * @see Collator#getEquivalentReorderCodes
694     * @draft ICU 4.8
695     */
696     virtual void U_EXPORT2 setReorderCodes(const int32_t* reorderCodes,
697                                int32_t reorderCodesLength,
698                                UErrorCode& status) ;
699
700    /**
701     * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
702     * codes will be grouped and must reorder together.
703     * @param reorderCode The reorder code to determine equivalence for.
704     * @param dest The array to fill with the script equivalene reordering codes.
705     * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the
706     * function will only return the length of the result without writing any of the result
707     * string (pre-flighting).
708     * @param status A reference to an error code value, which must not indicate
709     * a failure before the function call.
710     * @return The length of the of the reordering code equivalence array.
711     * @see ucol_setReorderCodes
712     * @see Collator#getReorderCodes
713     * @see Collator#setReorderCodes
714     * @draft ICU 4.8
715     */
716    static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
717                                int32_t* dest,
718                                int32_t destCapacity,
719                                UErrorCode& status);
720
721
722private:
723
724    // private static constants -----------------------------------------------
725
726    enum {
727        /* need look up in .commit() */
728        CHARINDEX = 0x70000000,
729        /* Expand index follows */
730        EXPANDCHARINDEX = 0x7E000000,
731        /* contract indexes follows */
732        CONTRACTCHARINDEX = 0x7F000000,
733        /* unmapped character values */
734        UNMAPPED = 0xFFFFFFFF,
735        /* primary strength increment */
736        PRIMARYORDERINCREMENT = 0x00010000,
737        /* secondary strength increment */
738        SECONDARYORDERINCREMENT = 0x00000100,
739        /* tertiary strength increment */
740        TERTIARYORDERINCREMENT = 0x00000001,
741        /* mask off anything but primary order */
742        PRIMARYORDERMASK = 0xffff0000,
743        /* mask off anything but secondary order */
744        SECONDARYORDERMASK = 0x0000ff00,
745        /* mask off anything but tertiary order */
746        TERTIARYORDERMASK = 0x000000ff,
747        /* mask off ignorable char order */
748        IGNORABLEMASK = 0x0000ffff,
749        /* use only the primary difference */
750        PRIMARYDIFFERENCEONLY = 0xffff0000,
751        /* use only the primary and secondary difference */
752        SECONDARYDIFFERENCEONLY = 0xffffff00,
753        /* primary order shift */
754        PRIMARYORDERSHIFT = 16,
755        /* secondary order shift */
756        SECONDARYORDERSHIFT = 8,
757        /* starting value for collation elements */
758        COLELEMENTSTART = 0x02020202,
759        /* testing mask for primary low element */
760        PRIMARYLOWZEROMASK = 0x00FF0000,
761        /* reseting value for secondaries and tertiaries */
762        RESETSECONDARYTERTIARY = 0x00000202,
763        /* reseting value for tertiaries */
764        RESETTERTIARY = 0x00000002,
765
766        PRIMIGNORABLE = 0x0202
767    };
768
769    // private data members ---------------------------------------------------
770
771    UBool dataIsOwned;
772
773    UBool isWriteThroughAlias;
774
775    /**
776    * c struct for collation. All initialisation for it has to be done through
777    * setUCollator().
778    */
779    UCollator *ucollator;
780
781    /**
782    * Rule UnicodeString
783    */
784    UnicodeString urulestring;
785
786    // friend classes --------------------------------------------------------
787
788    /**
789    * Used to iterate over collation elements in a character source.
790    */
791    friend class CollationElementIterator;
792
793    /**
794    * Collator ONLY needs access to RuleBasedCollator(const Locale&,
795    *                                                       UErrorCode&)
796    */
797    friend class Collator;
798
799    /**
800    * Searching over collation elements in a character source
801    */
802    friend class StringSearch;
803
804    // private constructors --------------------------------------------------
805
806    /**
807     * Default constructor
808     */
809    RuleBasedCollator();
810
811    /**
812     * RuleBasedCollator constructor. This constructor takes a locale. The
813     * only caller of this class should be Collator::createInstance(). If
814     * createInstance() happens to know that the requested locale's collation is
815     * implemented as a RuleBasedCollator, it can then call this constructor.
816     * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
817     * COLLATION TABLE. It does this by falling back to defaults.
818     * @param desiredLocale locale used
819     * @param status error code status
820     */
821    RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
822
823    /**
824     * common constructor implementation
825     *
826     * @param rules the collation rules to build the collation table from.
827     * @param collationStrength default strength for comparison
828     * @param decompositionMode the normalisation mode
829     * @param status reporting a success or an error.
830     */
831    void
832    construct(const UnicodeString& rules,
833              UColAttributeValue collationStrength,
834              UColAttributeValue decompositionMode,
835              UErrorCode& status);
836
837    // private methods -------------------------------------------------------
838
839    /**
840    * Creates the c struct for ucollator
841    * @param locale desired locale
842    * @param status error status
843    */
844    void setUCollator(const Locale& locale, UErrorCode& status);
845
846    /**
847    * Creates the c struct for ucollator
848    * @param locale desired locale name
849    * @param status error status
850    */
851    void setUCollator(const char* locale, UErrorCode& status);
852
853    /**
854    * Creates the c struct for ucollator. This used internally by StringSearch.
855    * Hence the responsibility of cleaning up the ucollator is not done by
856    * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
857    * @param collator new ucollator data
858    */
859    void setUCollator(UCollator *collator);
860
861public:
862    /**
863    * Get UCollator data struct. Used only by StringSearch & intltest.
864    * @return UCollator data struct
865    * @internal
866    */
867    const UCollator * getUCollator();
868
869protected:
870   /**
871    * Used internally by registraton to define the requested and valid locales.
872    * @param requestedLocale the requsted locale
873    * @param validLocale the valid locale
874    * @param actualLocale the actual locale
875    * @internal
876    */
877    virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
878
879private:
880
881    // if not owned and not a write through alias, copy the ucollator
882    void checkOwned(void);
883
884    // utility to init rule string used by checkOwned and construct
885    void setRuleStringFromCollator();
886
887    /**
888    * Converts C's UCollationResult to EComparisonResult
889    * @param result member of the enum UComparisonResult
890    * @return EComparisonResult equivalent of UCollationResult
891    * @deprecated ICU 2.6. We will not need it.
892    */
893    Collator::EComparisonResult getEComparisonResult(
894                                            const UCollationResult &result) const;
895
896    /**
897    * Converts C's UCollationStrength to ECollationStrength
898    * @param strength member of the enum UCollationStrength
899    * @return ECollationStrength equivalent of UCollationStrength
900    */
901    Collator::ECollationStrength getECollationStrength(
902                                        const UCollationStrength &strength) const;
903
904    /**
905    * Converts C++'s ECollationStrength to UCollationStrength
906    * @param strength member of the enum ECollationStrength
907    * @return UCollationStrength equivalent of ECollationStrength
908    */
909    UCollationStrength getUCollationStrength(
910      const Collator::ECollationStrength &strength) const;
911};
912
913// inline method implementation ---------------------------------------------
914
915inline void RuleBasedCollator::setUCollator(const Locale &locale,
916                                               UErrorCode &status)
917{
918    setUCollator(locale.getName(), status);
919}
920
921
922inline void RuleBasedCollator::setUCollator(UCollator     *collator)
923{
924
925    if (ucollator && dataIsOwned) {
926        ucol_close(ucollator);
927    }
928    ucollator   = collator;
929    dataIsOwned = FALSE;
930    isWriteThroughAlias = TRUE;
931    setRuleStringFromCollator();
932}
933
934inline const UCollator * RuleBasedCollator::getUCollator()
935{
936    return ucollator;
937}
938
939inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
940                                           const UCollationResult &result) const
941{
942    switch (result)
943    {
944    case UCOL_LESS :
945        return Collator::LESS;
946    case UCOL_EQUAL :
947        return Collator::EQUAL;
948    default :
949        return Collator::GREATER;
950    }
951}
952
953inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength(
954                                       const UCollationStrength &strength) const
955{
956    switch (strength)
957    {
958    case UCOL_PRIMARY :
959        return Collator::PRIMARY;
960    case UCOL_SECONDARY :
961        return Collator::SECONDARY;
962    case UCOL_TERTIARY :
963        return Collator::TERTIARY;
964    case UCOL_QUATERNARY :
965        return Collator::QUATERNARY;
966    default :
967        return Collator::IDENTICAL;
968    }
969}
970
971inline UCollationStrength RuleBasedCollator::getUCollationStrength(
972                             const Collator::ECollationStrength &strength) const
973{
974    switch (strength)
975    {
976    case Collator::PRIMARY :
977        return UCOL_PRIMARY;
978    case Collator::SECONDARY :
979        return UCOL_SECONDARY;
980    case Collator::TERTIARY :
981        return UCOL_TERTIARY;
982    case Collator::QUATERNARY :
983        return UCOL_QUATERNARY;
984    default :
985        return UCOL_IDENTICAL;
986    }
987}
988
989U_NAMESPACE_END
990
991#endif /* #if !UCONFIG_NO_COLLATION */
992
993#endif
994