164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ****************************************************************************** 5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Copyright (C) 1997-2014, International Business Machines 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Corporation and others. All Rights Reserved. 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ****************************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 1185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * \file 1285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * \brief C++ API: Collation Element Iterator. 1385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 1485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 1585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/** 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* File coleitr.h 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Created by: Helena Shih 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Modification History: 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 8/18/97 helena Added internal API documentation. 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 08/03/98 erm Synched with 1.2 version CollationElementIterator.java 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 12/10/99 aliu Ported Thai collation support from Java. 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h) 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 02/19/01 swquek Removed CollationElementsIterator() since it is 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* private constructor and no calls are made to it 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 2012-2014 markus Rewritten in C++ again. 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef COLEITR_H 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define COLEITR_H 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/unistr.h" 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h" 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstruct UCollationElements; 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstruct UHashtable; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstruct CollationData; 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CollationIterator; 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass RuleBasedCollator; 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass UCollationPCE; 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass UVector32; 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* The CollationElementIterator class is used as an iterator to walk through 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* each character of an international string. Use the iterator to return the 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ordering priority of the positioned character. The ordering priority of a 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* character, which we refer to as a key, defines how a character is collated in 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* the given collation object. 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* For example, consider the following in Slovak and in traditional Spanish collation: 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* <pre> 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* "ca" -> the first key is key('c') and second key is key('a'). 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* "cha" -> the first key is key('ch') and second key is key('a').</pre> 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* And in German phonebook collation, 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* <pre> \htmlonly "æb"-> the first key is key('a'), the second key is key('e'), and 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* the third key is key('b'). \endhtmlonly </pre> 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* The key of a character, is an integer composed of primary order(short), 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* secondary order(char), and tertiary order(char). Java strictly defines the 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* size and signedness of its primitive data types. Therefore, the static 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* int32_t to ensure the correctness of the key value. 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* <p>Example of the iterator usage: (without error checking) 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* <pre> 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* \code 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* void CollationElementIterator_Example() 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* { 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* UnicodeString str = "This is a test"; 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* UErrorCode success = U_ZERO_ERROR; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* RuleBasedCollator* rbc = 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (RuleBasedCollator*) RuleBasedCollator::createInstance(success); 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* CollationElementIterator* c = 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* rbc->createCollationElementIterator( str ); 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* int32_t order = c->next(success); 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* c->reset(); 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* order = c->previous(success); 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* delete c; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* delete rbc; 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* } 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* \endcode 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* </pre> 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* <p> 9359d709d503bab6e2b61931737e662dd293b40578ccornelius* The method next() returns the collation order of the next character based on 9459d709d503bab6e2b61931737e662dd293b40578ccornelius* the comparison level of the collator. The method previous() returns the 9559d709d503bab6e2b61931737e662dd293b40578ccornelius* collation order of the previous character based on the comparison level of 9659d709d503bab6e2b61931737e662dd293b40578ccornelius* the collator. The Collation Element Iterator moves only in one direction 9759d709d503bab6e2b61931737e662dd293b40578ccornelius* between calls to reset(), setOffset(), or setText(). That is, next() 9859d709d503bab6e2b61931737e662dd293b40578ccornelius* and previous() can not be inter-used. Whenever previous() is to be called after 9959d709d503bab6e2b61931737e662dd293b40578ccornelius* next() or vice versa, reset(), setOffset() or setText() has to be called first 10059d709d503bab6e2b61931737e662dd293b40578ccornelius* to reset the status, shifting pointers to either the end or the start of 10159d709d503bab6e2b61931737e662dd293b40578ccornelius* the string (reset() or setText()), or the specified position (setOffset()). 10259d709d503bab6e2b61931737e662dd293b40578ccornelius* Hence at the next call of next() or previous(), the first or last collation order, 10359d709d503bab6e2b61931737e662dd293b40578ccornelius* or collation order at the spefcifieid position will be returned. If a change of 10459d709d503bab6e2b61931737e662dd293b40578ccornelius* direction is done without one of these calls, the result is undefined. 10559d709d503bab6e2b61931737e662dd293b40578ccornelius* <p> 10659d709d503bab6e2b61931737e662dd293b40578ccornelius* The result of a forward iterate (next()) and reversed result of the backward 10759d709d503bab6e2b61931737e662dd293b40578ccornelius* iterate (previous()) on the same string are equivalent, if collation orders 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* with the value 0 are ignored. 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Character based on the comparison level of the collator. A collation order 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* consists of primary order, secondary order and tertiary order. The data 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* type of the collation order is <strong>int32_t</strong>. 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Note, CollationElementIterator should not be subclassed. 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* @see Collator 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* @see RuleBasedCollator 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* @version 1.8 Jan 16 2001 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 118f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusclass U_I18N_API CollationElementIterator U_FINAL : public UObject { 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // CollationElementIterator public data member ------------------------------ 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enum { 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * NULLORDER indicates that an error has occured while processing 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULLORDER = (int32_t)0xffffffff 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // CollationElementIterator public constructor/destructor ------------------- 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param other the object to be copied from 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CollationElementIterator(const CollationElementIterator& other); 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual ~CollationElementIterator(); 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // CollationElementIterator public methods ---------------------------------- 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns true if "other" is the same as "this" 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param other the object to be compared 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return true if "other" is the same as "this" 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool operator==(const CollationElementIterator& other) const; 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns true if "other" is not the same as "this". 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param other the object to be compared 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return true if "other" is not the same as "this" 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool operator!=(const CollationElementIterator& other) const; 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Resets the cursor to the beginning of the string. 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void reset(void); 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Gets the ordering priority of the next character in the string. 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status the error code status. 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the next character's ordering. otherwise returns NULLORDER if an 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * error has occured or if the end of string has been reached 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t next(UErrorCode& status); 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Get the ordering priority of the previous collation element in the string. 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status the error code status. 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the previous element's ordering. otherwise returns NULLORDER if an 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * error has occured or if the start of string has been reached 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t previous(UErrorCode& status); 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Gets the primary order of a collation order. 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param order the collation order 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the primary order of a collation order. 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static inline int32_t primaryOrder(int32_t order); 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Gets the secondary order of a collation order. 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param order the collation order 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the secondary order of a collation order. 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static inline int32_t secondaryOrder(int32_t order); 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Gets the tertiary order of a collation order. 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param order the collation order 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the tertiary order of a collation order. 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static inline int32_t tertiaryOrder(int32_t order); 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the maximum length of any expansion sequences that end with the 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * specified comparison order. 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param order a collation order returned by previous or next. 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return maximum size of the expansion sequences ending with the collation 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * element or 1 if collation element does not occur at the end of any 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * expansion sequence 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t getMaxExpansion(int32_t order) const; 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Gets the comparison order in the desired strength. Ignore the other 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * differences. 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param order The order value 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t strengthOrder(int32_t order) const; 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the source string. 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param str the source string. 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status the error code status. 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void setText(const UnicodeString& str, UErrorCode& status); 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the source string. 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param str the source character iterator. 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status the error code status. 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void setText(CharacterIterator& str, UErrorCode& status); 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Checks if a comparison order is ignorable. 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param order the collation order. 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return TRUE if a character is ignorable, FALSE otherwise. 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static inline UBool isIgnorable(int32_t order); 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Gets the offset of the currently processed character in the source string. 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the offset of the character. 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t getOffset(void) const; 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the offset of the currently processed character in the source string. 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param newOffset the new offset. 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status the error code status. 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the offset of the character. 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.0 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void setOffset(int32_t newOffset, UErrorCode& status); 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for the actual class. 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.2 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual UClassID getDynamicClassID() const; 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ICU "poor man's RTTI", returns a UClassID for this class. 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @stable ICU 2.2 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static UClassID U_EXPORT2 getStaticClassID(); 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef U_HIDE_INTERNAL_API 289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** @internal */ 290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) { 291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return reinterpret_cast<CollationElementIterator *>(uc); 292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** @internal */ 294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) { 295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return reinterpret_cast<const CollationElementIterator *>(uc); 296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** @internal */ 298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline UCollationElements *toUCollationElements() { 299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return reinterpret_cast<UCollationElements *>(this); 300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** @internal */ 302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline const UCollationElements *toUCollationElements() const { 303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return reinterpret_cast<const UCollationElements *>(this); 304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // U_HIDE_INTERNAL_API 306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 30759d709d503bab6e2b61931737e662dd293b40578ccorneliusprivate: 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru friend class RuleBasedCollator; 309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius friend class UCollationPCE; 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * CollationElementIterator constructor. This takes the source string and the 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * collation object. The cursor will walk thru the source string based on the 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * predefined collation rules. If the source string is empty, NULLORDER will 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be returned on the calls to next(). 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param sourceText the source string. 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param order the collation object. 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status the error code status. 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CollationElementIterator(const UnicodeString& sourceText, 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const RuleBasedCollator* order, UErrorCode& status); 322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Note: The constructors should take settings & tailoring, not a collator, 323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // to avoid circular dependencies. 324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // However, for operator==() we would need to be able to compare tailoring data for equality 325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // without making CollationData or CollationTailoring depend on TailoredSet. 326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // (See the implementation of RuleBasedCollator::operator==().) 327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // That might require creating an intermediate class that would be used 328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // by both CollationElementIterator and RuleBasedCollator 329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // but only contain the part of RBC== related to data and rules. 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * CollationElementIterator constructor. This takes the source string and the 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * collation object. The cursor will walk thru the source string based on the 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * predefined collation rules. If the source string is empty, NULLORDER will 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be returned on the calls to next(). 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param sourceText the source string. 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param order the collation object. 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status the error code status. 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CollationElementIterator(const CharacterIterator& sourceText, 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const RuleBasedCollator* order, UErrorCode& status); 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Assignment operator 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param other the object to be copied 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const CollationElementIterator& 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru operator=(const CollationElementIterator& other); 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CollationElementIterator(); // default constructor not implemented 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */ 354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; } 355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode); 357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order); 359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // CollationElementIterator private data members ---------------------------- 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationIterator *iter_; // owned 363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const RuleBasedCollator *rbc_; // aliased 364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t otherHalf_; 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * <0: backwards; 0: just after reset() (previous() begins from end); 367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 1: just after setOffset(); >1: forward 368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int8_t dir_; 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Stores offsets from expansions and from unsafe-backwards iteration, 372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * so that getOffset() returns intermediate offsets for the CEs 373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * that are consistent with forward iteration. 374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector32 *offsets_; 376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString string_; 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius// CollationElementIterator inline method definitions -------------------------- 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruinline int32_t CollationElementIterator::primaryOrder(int32_t order) 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (order >> 16) & 0xffff; 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruinline int32_t CollationElementIterator::secondaryOrder(int32_t order) 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (order >> 8) & 0xff; 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruinline int32_t CollationElementIterator::tertiaryOrder(int32_t order) 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return order & 0xff; 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruinline UBool CollationElementIterator::isIgnorable(int32_t order) 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (order & 0xffff0000) == 0; 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */ 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 407