1/* 2******************************************************************************* 3* Copyright (c) 1996-2009, International Business Machines Corporation and others. 4* All Rights Reserved. 5******************************************************************************* 6*/ 7 8#ifndef UCOL_H 9#define UCOL_H 10 11#include "unicode/utypes.h" 12 13#if !UCONFIG_NO_COLLATION 14 15#include "unicode/unorm.h" 16#include "unicode/parseerr.h" 17#include "unicode/uloc.h" 18#include "unicode/uset.h" 19 20/** 21 * \file 22 * \brief C API: Collator 23 * 24 * <h2> Collator C API </h2> 25 * 26 * The C API for Collator performs locale-sensitive 27 * string comparison. You use this service to build 28 * searching and sorting routines for natural language text. 29 * <em>Important: </em>The ICU collation service has been reimplemented 30 * in order to achieve better performance and UCA compliance. 31 * For details, see the 32 * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> 33 * collation design document</a>. 34 * <p> 35 * For more information about the collation service see 36 * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>. 37 * <p> 38 * Collation service provides correct sorting orders for most locales supported in ICU. 39 * If specific data for a locale is not available, the orders eventually falls back 40 * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 41 * <p> 42 * Sort ordering may be customized by providing your own set of rules. For more on 43 * this subject see the 44 * <a href="http://icu-project.org/userguide/Collate_Customization.html"> 45 * Collation customization</a> section of the users guide. 46 * <p> 47 * @see UCollationResult 48 * @see UNormalizationMode 49 * @see UCollationStrength 50 * @see UCollationElements 51 */ 52 53/** A collator. 54* For usage in C programs. 55*/ 56struct UCollator; 57/** structure representing a collator object instance 58 * @stable ICU 2.0 59 */ 60typedef struct UCollator UCollator; 61 62 63/** 64 * UCOL_LESS is returned if source string is compared to be less than target 65 * string in the u_strcoll() method. 66 * UCOL_EQUAL is returned if source string is compared to be equal to target 67 * string in the u_strcoll() method. 68 * UCOL_GREATER is returned if source string is compared to be greater than 69 * target string in the u_strcoll() method. 70 * @see u_strcoll() 71 * <p> 72 * Possible values for a comparison result 73 * @stable ICU 2.0 74 */ 75typedef enum { 76 /** string a == string b */ 77 UCOL_EQUAL = 0, 78 /** string a > string b */ 79 UCOL_GREATER = 1, 80 /** string a < string b */ 81 UCOL_LESS = -1 82} UCollationResult ; 83 84 85/** Enum containing attribute values for controling collation behavior. 86 * Here are all the allowable values. Not every attribute can take every value. The only 87 * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined 88 * value for that locale 89 * @stable ICU 2.0 90 */ 91typedef enum { 92 /** accepted by most attributes */ 93 UCOL_DEFAULT = -1, 94 95 /** Primary collation strength */ 96 UCOL_PRIMARY = 0, 97 /** Secondary collation strength */ 98 UCOL_SECONDARY = 1, 99 /** Tertiary collation strength */ 100 UCOL_TERTIARY = 2, 101 /** Default collation strength */ 102 UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, 103 UCOL_CE_STRENGTH_LIMIT, 104 /** Quaternary collation strength */ 105 UCOL_QUATERNARY=3, 106 /** Identical collation strength */ 107 UCOL_IDENTICAL=15, 108 UCOL_STRENGTH_LIMIT, 109 110 /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 111 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 112 & UCOL_DECOMPOSITION_MODE*/ 113 UCOL_OFF = 16, 114 /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 115 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 116 & UCOL_DECOMPOSITION_MODE*/ 117 UCOL_ON = 17, 118 119 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */ 120 UCOL_SHIFTED = 20, 121 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */ 122 UCOL_NON_IGNORABLE = 21, 123 124 /** Valid for UCOL_CASE_FIRST - 125 lower case sorts before upper case */ 126 UCOL_LOWER_FIRST = 24, 127 /** upper case sorts before lower case */ 128 UCOL_UPPER_FIRST = 25, 129 130 UCOL_ATTRIBUTE_VALUE_COUNT 131 132} UColAttributeValue; 133 134/** 135 * Base letter represents a primary difference. Set comparison 136 * level to UCOL_PRIMARY to ignore secondary and tertiary differences. 137 * Use this to set the strength of a Collator object. 138 * Example of primary difference, "abc" < "abd" 139 * 140 * Diacritical differences on the same base letter represent a secondary 141 * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary 142 * differences. Use this to set the strength of a Collator object. 143 * Example of secondary difference, "ä" >> "a". 144 * 145 * Uppercase and lowercase versions of the same character represents a 146 * tertiary difference. Set comparison level to UCOL_TERTIARY to include 147 * all comparison differences. Use this to set the strength of a Collator 148 * object. 149 * Example of tertiary difference, "abc" <<< "ABC". 150 * 151 * Two characters are considered "identical" when they have the same 152 * unicode spellings. UCOL_IDENTICAL. 153 * For example, "ä" == "ä". 154 * 155 * UCollationStrength is also used to determine the strength of sort keys 156 * generated from UCollator objects 157 * These values can be now found in the UColAttributeValue enum. 158 * @stable ICU 2.0 159 **/ 160typedef UColAttributeValue UCollationStrength; 161 162/** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT 163 * value, as well as the values specific to each one. 164 * @stable ICU 2.0 165 */ 166typedef enum { 167 /** Attribute for direction of secondary weights - used in French. 168 * Acceptable values are UCOL_ON, which results in secondary weights 169 * being considered backwards and UCOL_OFF which treats secondary 170 * weights in the order they appear.*/ 171 UCOL_FRENCH_COLLATION, 172 /** Attribute for handling variable elements. 173 * Acceptable values are UCOL_NON_IGNORABLE (default) 174 * which treats all the codepoints with non-ignorable 175 * primary weights in the same way, 176 * and UCOL_SHIFTED which causes codepoints with primary 177 * weights that are equal or below the variable top value 178 * to be ignored on primary level and moved to the quaternary 179 * level.*/ 180 UCOL_ALTERNATE_HANDLING, 181 /** Controls the ordering of upper and lower case letters. 182 * Acceptable values are UCOL_OFF (default), which orders 183 * upper and lower case letters in accordance to their tertiary 184 * weights, UCOL_UPPER_FIRST which forces upper case letters to 185 * sort before lower case letters, and UCOL_LOWER_FIRST which does 186 * the opposite. */ 187 UCOL_CASE_FIRST, 188 /** Controls whether an extra case level (positioned before the third 189 * level) is generated or not. Acceptable values are UCOL_OFF (default), 190 * when case level is not generated, and UCOL_ON which causes the case 191 * level to be generated. Contents of the case level are affected by 192 * the value of UCOL_CASE_FIRST attribute. A simple way to ignore 193 * accent differences in a string is to set the strength to UCOL_PRIMARY 194 * and enable case level. */ 195 UCOL_CASE_LEVEL, 196 /** Controls whether the normalization check and necessary normalizations 197 * are performed. When set to UCOL_OFF (default) no normalization check 198 * is performed. The correctness of the result is guaranteed only if the 199 * input data is in so-called FCD form (see users manual for more info). 200 * When set to UCOL_ON, an incremental check is performed to see whether 201 * the input data is in the FCD form. If the data is not in the FCD form, 202 * incremental NFD normalization is performed. */ 203 UCOL_NORMALIZATION_MODE, 204 /** An alias for UCOL_NORMALIZATION_MODE attribute */ 205 UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, 206 /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, 207 * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength 208 * for most locales (except Japanese) is tertiary. Quaternary strength 209 * is useful when combined with shifted setting for alternate handling 210 * attribute and for JIS x 4061 collation, when it is used to distinguish 211 * between Katakana and Hiragana (this is achieved by setting the 212 * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level 213 * is affected only by the number of non ignorable code points in 214 * the string. Identical strength is rarely useful, as it amounts 215 * to codepoints of the NFD form of the string. */ 216 UCOL_STRENGTH, 217 /** When turned on, this attribute positions Hiragana before all 218 * non-ignorables on quaternary level This is a sneaky way to produce JIS 219 * sort order */ 220 UCOL_HIRAGANA_QUATERNARY_MODE, 221 /** When turned on, this attribute generates a collation key 222 * for the numeric value of substrings of digits. 223 * This is a way to get '100' to sort AFTER '2'. */ 224 UCOL_NUMERIC_COLLATION, 225 UCOL_ATTRIBUTE_COUNT 226} UColAttribute; 227 228/** Options for retrieving the rule string 229 * @stable ICU 2.0 230 */ 231typedef enum { 232 /** Retrieve tailoring only */ 233 UCOL_TAILORING_ONLY, 234 /** Retrieve UCA rules and tailoring */ 235 UCOL_FULL_RULES 236} UColRuleOption ; 237 238/** 239 * Open a UCollator for comparing strings. 240 * The UCollator pointer is used in all the calls to the Collation 241 * service. After finished, collator must be disposed of by calling 242 * {@link #ucol_close }. 243 * @param loc The locale containing the required collation rules. 244 * Special values for locales can be passed in - 245 * if NULL is passed for the locale, the default locale 246 * collation rules will be used. If empty string ("") or 247 * "root" are passed, UCA rules will be used. 248 * @param status A pointer to an UErrorCode to receive any errors 249 * @return A pointer to a UCollator, or 0 if an error occurred. 250 * @see ucol_openRules 251 * @see ucol_safeClone 252 * @see ucol_close 253 * @stable ICU 2.0 254 */ 255U_STABLE UCollator* U_EXPORT2 256ucol_open(const char *loc, UErrorCode *status); 257 258/** 259 * Produce an UCollator instance according to the rules supplied. 260 * The rules are used to change the default ordering, defined in the 261 * UCA in a process called tailoring. The resulting UCollator pointer 262 * can be used in the same way as the one obtained by {@link #ucol_strcoll }. 263 * @param rules A string describing the collation rules. For the syntax 264 * of the rules please see users guide. 265 * @param rulesLength The length of rules, or -1 if null-terminated. 266 * @param normalizationMode The normalization mode: One of 267 * UCOL_OFF (expect the text to not need normalization), 268 * UCOL_ON (normalize), or 269 * UCOL_DEFAULT (set the mode according to the rules) 270 * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 271 * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules. 272 * @param parseError A pointer to UParseError to recieve information about errors 273 * occurred during parsing. This argument can currently be set 274 * to NULL, but at users own risk. Please provide a real structure. 275 * @param status A pointer to an UErrorCode to receive any errors 276 * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case 277 * of error - please use status argument to check for errors. 278 * @see ucol_open 279 * @see ucol_safeClone 280 * @see ucol_close 281 * @stable ICU 2.0 282 */ 283U_STABLE UCollator* U_EXPORT2 284ucol_openRules( const UChar *rules, 285 int32_t rulesLength, 286 UColAttributeValue normalizationMode, 287 UCollationStrength strength, 288 UParseError *parseError, 289 UErrorCode *status); 290 291/** 292 * Open a collator defined by a short form string. 293 * The structure and the syntax of the string is defined in the "Naming collators" 294 * section of the users guide: 295 * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators 296 * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final 297 * strength will be 3. 3066bis locale overrides individual locale parts. 298 * The call to this function is equivalent to a call to ucol_open, followed by a 299 * series of calls to ucol_setAttribute and ucol_setVariableTop. 300 * @param definition A short string containing a locale and a set of attributes. 301 * Attributes not explicitly mentioned are left at the default 302 * state for a locale. 303 * @param parseError if not NULL, structure that will get filled with error's pre 304 * and post context in case of error. 305 * @param forceDefaults if FALSE, the settings that are the same as the collator 306 * default settings will not be applied (for example, setting 307 * French secondary on a French collator would not be executed). 308 * If TRUE, all the settings will be applied regardless of the 309 * collator default value. If the definition 310 * strings are to be cached, should be set to FALSE. 311 * @param status Error code. Apart from regular error conditions connected to 312 * instantiating collators (like out of memory or similar), this 313 * API will return an error if an invalid attribute or attribute/value 314 * combination is specified. 315 * @return A pointer to a UCollator or 0 if an error occured (including an 316 * invalid attribute). 317 * @see ucol_open 318 * @see ucol_setAttribute 319 * @see ucol_setVariableTop 320 * @see ucol_getShortDefinitionString 321 * @see ucol_normalizeShortDefinitionString 322 * @stable ICU 3.0 323 * 324 */ 325U_STABLE UCollator* U_EXPORT2 326ucol_openFromShortString( const char *definition, 327 UBool forceDefaults, 328 UParseError *parseError, 329 UErrorCode *status); 330 331/** 332 * Get a set containing the contractions defined by the collator. The set includes 333 * both the UCA contractions and the contractions defined by the collator. This set 334 * will contain only strings. If a tailoring explicitly suppresses contractions from 335 * the UCA (like Russian), removed contractions will not be in the resulting set. 336 * @param coll collator 337 * @param conts the set to hold the result. It gets emptied before 338 * contractions are added. 339 * @param status to hold the error code 340 * @return the size of the contraction set 341 * 342 * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead 343 */ 344U_DEPRECATED int32_t U_EXPORT2 345ucol_getContractions( const UCollator *coll, 346 USet *conts, 347 UErrorCode *status); 348 349/** 350 * Get a set containing the expansions defined by the collator. The set includes 351 * both the UCA expansions and the expansions defined by the tailoring 352 * @param coll collator 353 * @param contractions if not NULL, the set to hold the contractions 354 * @param expansions if not NULL, the set to hold the expansions 355 * @param addPrefixes add the prefix contextual elements to contractions 356 * @param status to hold the error code 357 * 358 * @stable ICU 3.4 359 */ 360U_STABLE void U_EXPORT2 361ucol_getContractionsAndExpansions( const UCollator *coll, 362 USet *contractions, USet *expansions, 363 UBool addPrefixes, UErrorCode *status); 364 365/** 366 * Close a UCollator. 367 * Once closed, a UCollator should not be used. Every open collator should 368 * be closed. Otherwise, a memory leak will result. 369 * @param coll The UCollator to close. 370 * @see ucol_open 371 * @see ucol_openRules 372 * @see ucol_safeClone 373 * @stable ICU 2.0 374 */ 375U_STABLE void U_EXPORT2 376ucol_close(UCollator *coll); 377 378/** 379 * Compare two strings. 380 * The strings will be compared using the options already specified. 381 * @param coll The UCollator containing the comparison rules. 382 * @param source The source string. 383 * @param sourceLength The length of source, or -1 if null-terminated. 384 * @param target The target string. 385 * @param targetLength The length of target, or -1 if null-terminated. 386 * @return The result of comparing the strings; one of UCOL_EQUAL, 387 * UCOL_GREATER, UCOL_LESS 388 * @see ucol_greater 389 * @see ucol_greaterOrEqual 390 * @see ucol_equal 391 * @stable ICU 2.0 392 */ 393U_STABLE UCollationResult U_EXPORT2 394ucol_strcoll( const UCollator *coll, 395 const UChar *source, 396 int32_t sourceLength, 397 const UChar *target, 398 int32_t targetLength); 399 400/** 401 * Determine if one string is greater than another. 402 * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER 403 * @param coll The UCollator containing the comparison rules. 404 * @param source The source string. 405 * @param sourceLength The length of source, or -1 if null-terminated. 406 * @param target The target string. 407 * @param targetLength The length of target, or -1 if null-terminated. 408 * @return TRUE if source is greater than target, FALSE otherwise. 409 * @see ucol_strcoll 410 * @see ucol_greaterOrEqual 411 * @see ucol_equal 412 * @stable ICU 2.0 413 */ 414U_STABLE UBool U_EXPORT2 415ucol_greater(const UCollator *coll, 416 const UChar *source, int32_t sourceLength, 417 const UChar *target, int32_t targetLength); 418 419/** 420 * Determine if one string is greater than or equal to another. 421 * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS 422 * @param coll The UCollator containing the comparison rules. 423 * @param source The source string. 424 * @param sourceLength The length of source, or -1 if null-terminated. 425 * @param target The target string. 426 * @param targetLength The length of target, or -1 if null-terminated. 427 * @return TRUE if source is greater than or equal to target, FALSE otherwise. 428 * @see ucol_strcoll 429 * @see ucol_greater 430 * @see ucol_equal 431 * @stable ICU 2.0 432 */ 433U_STABLE UBool U_EXPORT2 434ucol_greaterOrEqual(const UCollator *coll, 435 const UChar *source, int32_t sourceLength, 436 const UChar *target, int32_t targetLength); 437 438/** 439 * Compare two strings for equality. 440 * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL 441 * @param coll The UCollator containing the comparison rules. 442 * @param source The source string. 443 * @param sourceLength The length of source, or -1 if null-terminated. 444 * @param target The target string. 445 * @param targetLength The length of target, or -1 if null-terminated. 446 * @return TRUE if source is equal to target, FALSE otherwise 447 * @see ucol_strcoll 448 * @see ucol_greater 449 * @see ucol_greaterOrEqual 450 * @stable ICU 2.0 451 */ 452U_STABLE UBool U_EXPORT2 453ucol_equal(const UCollator *coll, 454 const UChar *source, int32_t sourceLength, 455 const UChar *target, int32_t targetLength); 456 457/** 458 * Compare two UTF-8 encoded trings. 459 * The strings will be compared using the options already specified. 460 * @param coll The UCollator containing the comparison rules. 461 * @param sIter The source string iterator. 462 * @param tIter The target string iterator. 463 * @return The result of comparing the strings; one of UCOL_EQUAL, 464 * UCOL_GREATER, UCOL_LESS 465 * @param status A pointer to an UErrorCode to receive any errors 466 * @see ucol_strcoll 467 * @stable ICU 2.6 468 */ 469U_STABLE UCollationResult U_EXPORT2 470ucol_strcollIter( const UCollator *coll, 471 UCharIterator *sIter, 472 UCharIterator *tIter, 473 UErrorCode *status); 474 475/** 476 * Get the collation strength used in a UCollator. 477 * The strength influences how strings are compared. 478 * @param coll The UCollator to query. 479 * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 480 * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL 481 * @see ucol_setStrength 482 * @stable ICU 2.0 483 */ 484U_STABLE UCollationStrength U_EXPORT2 485ucol_getStrength(const UCollator *coll); 486 487/** 488 * Set the collation strength used in a UCollator. 489 * The strength influences how strings are compared. 490 * @param coll The UCollator to set. 491 * @param strength The desired collation strength; one of UCOL_PRIMARY, 492 * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT 493 * @see ucol_getStrength 494 * @stable ICU 2.0 495 */ 496U_STABLE void U_EXPORT2 497ucol_setStrength(UCollator *coll, 498 UCollationStrength strength); 499 500/** 501 * Get the display name for a UCollator. 502 * The display name is suitable for presentation to a user. 503 * @param objLoc The locale of the collator in question. 504 * @param dispLoc The locale for display. 505 * @param result A pointer to a buffer to receive the attribute. 506 * @param resultLength The maximum size of result. 507 * @param status A pointer to an UErrorCode to receive any errors 508 * @return The total buffer size needed; if greater than resultLength, 509 * the output was truncated. 510 * @stable ICU 2.0 511 */ 512U_STABLE int32_t U_EXPORT2 513ucol_getDisplayName( const char *objLoc, 514 const char *dispLoc, 515 UChar *result, 516 int32_t resultLength, 517 UErrorCode *status); 518 519/** 520 * Get a locale for which collation rules are available. 521 * A UCollator in a locale returned by this function will perform the correct 522 * collation for the locale. 523 * @param localeIndex The index of the desired locale. 524 * @return A locale for which collation rules are available, or 0 if none. 525 * @see ucol_countAvailable 526 * @stable ICU 2.0 527 */ 528U_STABLE const char* U_EXPORT2 529ucol_getAvailable(int32_t localeIndex); 530 531/** 532 * Determine how many locales have collation rules available. 533 * This function is most useful as determining the loop ending condition for 534 * calls to {@link #ucol_getAvailable }. 535 * @return The number of locales for which collation rules are available. 536 * @see ucol_getAvailable 537 * @stable ICU 2.0 538 */ 539U_STABLE int32_t U_EXPORT2 540ucol_countAvailable(void); 541 542#if !UCONFIG_NO_SERVICE 543/** 544 * Create a string enumerator of all locales for which a valid 545 * collator may be opened. 546 * @param status input-output error code 547 * @return a string enumeration over locale strings. The caller is 548 * responsible for closing the result. 549 * @stable ICU 3.0 550 */ 551U_STABLE UEnumeration* U_EXPORT2 552ucol_openAvailableLocales(UErrorCode *status); 553#endif 554 555/** 556 * Create a string enumerator of all possible keywords that are relevant to 557 * collation. At this point, the only recognized keyword for this 558 * service is "collation". 559 * @param status input-output error code 560 * @return a string enumeration over locale strings. The caller is 561 * responsible for closing the result. 562 * @stable ICU 3.0 563 */ 564U_STABLE UEnumeration* U_EXPORT2 565ucol_getKeywords(UErrorCode *status); 566 567/** 568 * Given a keyword, create a string enumeration of all values 569 * for that keyword that are currently in use. 570 * @param keyword a particular keyword as enumerated by 571 * ucol_getKeywords. If any other keyword is passed in, *status is set 572 * to U_ILLEGAL_ARGUMENT_ERROR. 573 * @param status input-output error code 574 * @return a string enumeration over collation keyword values, or NULL 575 * upon error. The caller is responsible for closing the result. 576 * @stable ICU 3.0 577 */ 578U_STABLE UEnumeration* U_EXPORT2 579ucol_getKeywordValues(const char *keyword, UErrorCode *status); 580 581/** 582 * Given a key and a locale, returns an array of string values in a preferred 583 * order that would make a difference. These are all and only those values where 584 * the open (creation) of the service with the locale formed from the input locale 585 * plus input keyword and that value has different behavior than creation with the 586 * input locale alone. 587 * @param key one of the keys supported by this service. For now, only 588 * "collation" is supported. 589 * @param locale the locale 590 * @param commonlyUsed if set to true it will return only commonly used values 591 * with the given locale in preferred order. Otherwise, 592 * it will return all the available values for the locale. 593 * @param status error status 594 * @return a string enumeration over keyword values for the given key and the locale. 595 * @draft ICU 4.2 596 */ 597U_DRAFT UEnumeration* U_EXPORT2 598ucol_getKeywordValuesForLocale(const char* key, 599 const char* locale, 600 UBool commonlyUsed, 601 UErrorCode* status); 602 603/** 604 * Return the functionally equivalent locale for the given 605 * requested locale, with respect to given keyword, for the 606 * collation service. If two locales return the same result, then 607 * collators instantiated for these locales will behave 608 * equivalently. The converse is not always true; two collators 609 * may in fact be equivalent, but return different results, due to 610 * internal details. The return result has no other meaning than 611 * that stated above, and implies nothing as to the relationship 612 * between the two locales. This is intended for use by 613 * applications who wish to cache collators, or otherwise reuse 614 * collators when possible. The functional equivalent may change 615 * over time. For more information, please see the <a 616 * href="http://icu-project.org/userguide/locale.html#services"> 617 * Locales and Services</a> section of the ICU User Guide. 618 * @param result fillin for the functionally equivalent locale 619 * @param resultCapacity capacity of the fillin buffer 620 * @param keyword a particular keyword as enumerated by 621 * ucol_getKeywords. 622 * @param locale the requested locale 623 * @param isAvailable if non-NULL, pointer to a fillin parameter that 624 * indicates whether the requested locale was 'available' to the 625 * collation service. A locale is defined as 'available' if it 626 * physically exists within the collation locale data. 627 * @param status pointer to input-output error code 628 * @return the actual buffer size needed for the locale. If greater 629 * than resultCapacity, the returned full name will be truncated and 630 * an error code will be returned. 631 * @stable ICU 3.0 632 */ 633U_STABLE int32_t U_EXPORT2 634ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 635 const char* keyword, const char* locale, 636 UBool* isAvailable, UErrorCode* status); 637 638/** 639 * Get the collation rules from a UCollator. 640 * The rules will follow the rule syntax. 641 * @param coll The UCollator to query. 642 * @param length 643 * @return The collation rules. 644 * @stable ICU 2.0 645 */ 646U_STABLE const UChar* U_EXPORT2 647ucol_getRules( const UCollator *coll, 648 int32_t *length); 649 650/** Get the short definition string for a collator. This API harvests the collator's 651 * locale and the attribute set and produces a string that can be used for opening 652 * a collator with the same properties using the ucol_openFromShortString API. 653 * This string will be normalized. 654 * The structure and the syntax of the string is defined in the "Naming collators" 655 * section of the users guide: 656 * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators 657 * This API supports preflighting. 658 * @param coll a collator 659 * @param locale a locale that will appear as a collators locale in the resulting 660 * short string definition. If NULL, the locale will be harvested 661 * from the collator. 662 * @param buffer space to hold the resulting string 663 * @param capacity capacity of the buffer 664 * @param status for returning errors. All the preflighting errors are featured 665 * @return length of the resulting string 666 * @see ucol_openFromShortString 667 * @see ucol_normalizeShortDefinitionString 668 * @stable ICU 3.0 669 */ 670U_STABLE int32_t U_EXPORT2 671ucol_getShortDefinitionString(const UCollator *coll, 672 const char *locale, 673 char *buffer, 674 int32_t capacity, 675 UErrorCode *status); 676 677/** Verifies and normalizes short definition string. 678 * Normalized short definition string has all the option sorted by the argument name, 679 * so that equivalent definition strings are the same. 680 * This API supports preflighting. 681 * @param source definition string 682 * @param destination space to hold the resulting string 683 * @param capacity capacity of the buffer 684 * @param parseError if not NULL, structure that will get filled with error's pre 685 * and post context in case of error. 686 * @param status Error code. This API will return an error if an invalid attribute 687 * or attribute/value combination is specified. All the preflighting 688 * errors are also featured 689 * @return length of the resulting normalized string. 690 * 691 * @see ucol_openFromShortString 692 * @see ucol_getShortDefinitionString 693 * 694 * @stable ICU 3.0 695 */ 696 697U_STABLE int32_t U_EXPORT2 698ucol_normalizeShortDefinitionString(const char *source, 699 char *destination, 700 int32_t capacity, 701 UParseError *parseError, 702 UErrorCode *status); 703 704 705/** 706 * Get a sort key for a string from a UCollator. 707 * Sort keys may be compared using <TT>strcmp</TT>. 708 * @param coll The UCollator containing the collation rules. 709 * @param source The string to transform. 710 * @param sourceLength The length of source, or -1 if null-terminated. 711 * @param result A pointer to a buffer to receive the attribute. 712 * @param resultLength The maximum size of result. 713 * @return The size needed to fully store the sort key. 714 * If there was an internal error generating the sort key, 715 * a zero value is returned. 716 * @see ucol_keyHashCode 717 * @stable ICU 2.0 718 */ 719U_STABLE int32_t U_EXPORT2 720ucol_getSortKey(const UCollator *coll, 721 const UChar *source, 722 int32_t sourceLength, 723 uint8_t *result, 724 int32_t resultLength); 725 726 727/** Gets the next count bytes of a sort key. Caller needs 728 * to preserve state array between calls and to provide 729 * the same type of UCharIterator set with the same string. 730 * The destination buffer provided must be big enough to store 731 * the number of requested bytes. Generated sortkey is not 732 * compatible with sortkeys generated using ucol_getSortKey 733 * API, since we don't do any compression. If uncompressed 734 * sortkeys are required, this API can be used. 735 * @param coll The UCollator containing the collation rules. 736 * @param iter UCharIterator containing the string we need 737 * the sort key to be calculated for. 738 * @param state Opaque state of sortkey iteration. 739 * @param dest Buffer to hold the resulting sortkey part 740 * @param count number of sort key bytes required. 741 * @param status error code indicator. 742 * @return the actual number of bytes of a sortkey. It can be 743 * smaller than count if we have reached the end of 744 * the sort key. 745 * @stable ICU 2.6 746 */ 747U_STABLE int32_t U_EXPORT2 748ucol_nextSortKeyPart(const UCollator *coll, 749 UCharIterator *iter, 750 uint32_t state[2], 751 uint8_t *dest, int32_t count, 752 UErrorCode *status); 753 754/** enum that is taken by ucol_getBound API 755 * See below for explanation 756 * do not change the values assigned to the 757 * members of this enum. Underlying code 758 * depends on them having these numbers 759 * @stable ICU 2.0 760 */ 761typedef enum { 762 /** lower bound */ 763 UCOL_BOUND_LOWER = 0, 764 /** upper bound that will match strings of exact size */ 765 UCOL_BOUND_UPPER = 1, 766 /** upper bound that will match all the strings that have the same initial substring as the given string */ 767 UCOL_BOUND_UPPER_LONG = 2, 768 UCOL_BOUND_VALUE_COUNT 769} UColBoundMode; 770 771/** 772 * Produce a bound for a given sortkey and a number of levels. 773 * Return value is always the number of bytes needed, regardless of 774 * whether the result buffer was big enough or even valid.<br> 775 * Resulting bounds can be used to produce a range of strings that are 776 * between upper and lower bounds. For example, if bounds are produced 777 * for a sortkey of string "smith", strings between upper and lower 778 * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> 779 * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER 780 * is produced, strings matched would be as above. However, if bound 781 * produced using UCOL_BOUND_UPPER_LONG is used, the above example will 782 * also match "Smithsonian" and similar.<br> 783 * For more on usage, see example in cintltst/capitst.c in procedure 784 * TestBounds. 785 * Sort keys may be compared using <TT>strcmp</TT>. 786 * @param source The source sortkey. 787 * @param sourceLength The length of source, or -1 if null-terminated. 788 * (If an unmodified sortkey is passed, it is always null 789 * terminated). 790 * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 791 * produces a lower inclusive bound, UCOL_BOUND_UPPER, that 792 * produces upper bound that matches strings of the same length 793 * or UCOL_BOUND_UPPER_LONG that matches strings that have the 794 * same starting substring as the source string. 795 * @param noOfLevels Number of levels required in the resulting bound (for most 796 * uses, the recommended value is 1). See users guide for 797 * explanation on number of levels a sortkey can have. 798 * @param result A pointer to a buffer to receive the resulting sortkey. 799 * @param resultLength The maximum size of result. 800 * @param status Used for returning error code if something went wrong. If the 801 * number of levels requested is higher than the number of levels 802 * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 803 * issued. 804 * @return The size needed to fully store the bound. 805 * @see ucol_keyHashCode 806 * @stable ICU 2.1 807 */ 808U_STABLE int32_t U_EXPORT2 809ucol_getBound(const uint8_t *source, 810 int32_t sourceLength, 811 UColBoundMode boundType, 812 uint32_t noOfLevels, 813 uint8_t *result, 814 int32_t resultLength, 815 UErrorCode *status); 816 817/** 818 * Gets the version information for a Collator. Version is currently 819 * an opaque 32-bit number which depends, among other things, on major 820 * versions of the collator tailoring and UCA. 821 * @param coll The UCollator to query. 822 * @param info the version # information, the result will be filled in 823 * @stable ICU 2.0 824 */ 825U_STABLE void U_EXPORT2 826ucol_getVersion(const UCollator* coll, UVersionInfo info); 827 828/** 829 * Gets the UCA version information for a Collator. Version is the 830 * UCA version number (3.1.1, 4.0). 831 * @param coll The UCollator to query. 832 * @param info the version # information, the result will be filled in 833 * @stable ICU 2.8 834 */ 835U_STABLE void U_EXPORT2 836ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); 837 838/** 839 * Merge two sort keys. The levels are merged with their corresponding counterparts 840 * (primaries with primaries, secondaries with secondaries etc.). Between the values 841 * from the same level a separator is inserted. 842 * example (uncompressed): 843 * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00 844 * will be merged as 845 * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00 846 * This allows for concatenating of first and last names for sorting, among other things. 847 * If the destination buffer is not big enough, the results are undefined. 848 * If any of source lengths are zero or any of source pointers are NULL/undefined, 849 * result is of size zero. 850 * @param src1 pointer to the first sortkey 851 * @param src1Length length of the first sortkey 852 * @param src2 pointer to the second sortkey 853 * @param src2Length length of the second sortkey 854 * @param dest buffer to hold the result 855 * @param destCapacity size of the buffer for the result 856 * @return size of the result. If the buffer is big enough size is always 857 * src1Length+src2Length-1 858 * @stable ICU 2.0 859 */ 860U_STABLE int32_t U_EXPORT2 861ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, 862 const uint8_t *src2, int32_t src2Length, 863 uint8_t *dest, int32_t destCapacity); 864 865/** 866 * Universal attribute setter 867 * @param coll collator which attributes are to be changed 868 * @param attr attribute type 869 * @param value attribute value 870 * @param status to indicate whether the operation went on smoothly or there were errors 871 * @see UColAttribute 872 * @see UColAttributeValue 873 * @see ucol_getAttribute 874 * @stable ICU 2.0 875 */ 876U_STABLE void U_EXPORT2 877ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); 878 879/** 880 * Universal attribute getter 881 * @param coll collator which attributes are to be changed 882 * @param attr attribute type 883 * @return attribute value 884 * @param status to indicate whether the operation went on smoothly or there were errors 885 * @see UColAttribute 886 * @see UColAttributeValue 887 * @see ucol_setAttribute 888 * @stable ICU 2.0 889 */ 890U_STABLE UColAttributeValue U_EXPORT2 891ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status); 892 893/** Variable top 894 * is a two byte primary value which causes all the codepoints with primary values that 895 * are less or equal than the variable top to be shifted when alternate handling is set 896 * to UCOL_SHIFTED. 897 * Sets the variable top to a collation element value of a string supplied. 898 * @param coll collator which variable top needs to be changed 899 * @param varTop one or more (if contraction) UChars to which the variable top should be set 900 * @param len length of variable top string. If -1 it is considered to be zero terminated. 901 * @param status error code. If error code is set, the return value is undefined. 902 * Errors set by this function are: <br> 903 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such 904 * a contraction<br> 905 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes 906 * @return a 32 bit value containing the value of the variable top in upper 16 bits. 907 * Lower 16 bits are undefined 908 * @see ucol_getVariableTop 909 * @see ucol_restoreVariableTop 910 * @stable ICU 2.0 911 */ 912U_STABLE uint32_t U_EXPORT2 913ucol_setVariableTop(UCollator *coll, 914 const UChar *varTop, int32_t len, 915 UErrorCode *status); 916 917/** 918 * Gets the variable top value of a Collator. 919 * Lower 16 bits are undefined and should be ignored. 920 * @param coll collator which variable top needs to be retrieved 921 * @param status error code (not changed by function). If error code is set, 922 * the return value is undefined. 923 * @return the variable top value of a Collator. 924 * @see ucol_setVariableTop 925 * @see ucol_restoreVariableTop 926 * @stable ICU 2.0 927 */ 928U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status); 929 930/** 931 * Sets the variable top to a collation element value supplied. Variable top is 932 * set to the upper 16 bits. 933 * Lower 16 bits are ignored. 934 * @param coll collator which variable top needs to be changed 935 * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop 936 * @param status error code (not changed by function) 937 * @see ucol_getVariableTop 938 * @see ucol_setVariableTop 939 * @stable ICU 2.0 940 */ 941U_STABLE void U_EXPORT2 942ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status); 943 944/** 945 * Thread safe cloning operation. The result is a clone of a given collator. 946 * @param coll collator to be cloned 947 * @param stackBuffer user allocated space for the new clone. 948 * If NULL new memory will be allocated. 949 * If buffer is not large enough, new memory will be allocated. 950 * Clients can use the U_COL_SAFECLONE_BUFFERSIZE. 951 * This will probably be enough to avoid memory allocations. 952 * @param pBufferSize pointer to size of allocated space. 953 * If *pBufferSize == 0, a sufficient size for use in cloning will 954 * be returned ('pre-flighting') 955 * If *pBufferSize is not enough for a stack-based safe clone, 956 * new memory will be allocated. 957 * @param status to indicate whether the operation went on smoothly or there were errors 958 * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any 959 * allocations were necessary. 960 * @return pointer to the new clone 961 * @see ucol_open 962 * @see ucol_openRules 963 * @see ucol_close 964 * @stable ICU 2.0 965 */ 966U_STABLE UCollator* U_EXPORT2 967ucol_safeClone(const UCollator *coll, 968 void *stackBuffer, 969 int32_t *pBufferSize, 970 UErrorCode *status); 971 972/** default memory size for the new clone. It needs to be this large for os/400 large pointers 973 * @stable ICU 2.0 974 */ 975#define U_COL_SAFECLONE_BUFFERSIZE 512 976 977/** 978 * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 979 * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 980 * to store rules, will store up to available space. 981 * @param coll collator to get the rules from 982 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 983 * @param buffer buffer to store the result in. If NULL, you'll get no rules. 984 * @param bufferLen lenght of buffer to store rules in. If less then needed you'll get only the part that fits in. 985 * @return current rules 986 * @stable ICU 2.0 987 */ 988U_STABLE int32_t U_EXPORT2 989ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen); 990 991/** 992 * gets the locale name of the collator. If the collator 993 * is instantiated from the rules, then this function returns 994 * NULL. 995 * @param coll The UCollator for which the locale is needed 996 * @param type You can choose between requested, valid and actual 997 * locale. For description see the definition of 998 * ULocDataLocaleType in uloc.h 999 * @param status error code of the operation 1000 * @return real locale name from which the collation data comes. 1001 * If the collator was instantiated from rules, returns 1002 * NULL. 1003 * @deprecated ICU 2.8 Use ucol_getLocaleByType instead 1004 */ 1005U_DEPRECATED const char * U_EXPORT2 1006ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1007 1008 1009/** 1010 * gets the locale name of the collator. If the collator 1011 * is instantiated from the rules, then this function returns 1012 * NULL. 1013 * @param coll The UCollator for which the locale is needed 1014 * @param type You can choose between requested, valid and actual 1015 * locale. For description see the definition of 1016 * ULocDataLocaleType in uloc.h 1017 * @param status error code of the operation 1018 * @return real locale name from which the collation data comes. 1019 * If the collator was instantiated from rules, returns 1020 * NULL. 1021 * @stable ICU 2.8 1022 */ 1023U_STABLE const char * U_EXPORT2 1024ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1025 1026/** 1027 * Get an Unicode set that contains all the characters and sequences tailored in 1028 * this collator. The result must be disposed of by using uset_close. 1029 * @param coll The UCollator for which we want to get tailored chars 1030 * @param status error code of the operation 1031 * @return a pointer to newly created USet. Must be be disposed by using uset_close 1032 * @see ucol_openRules 1033 * @see uset_close 1034 * @stable ICU 2.4 1035 */ 1036U_STABLE USet * U_EXPORT2 1037ucol_getTailoredSet(const UCollator *coll, UErrorCode *status); 1038 1039/** 1040 * Universal attribute getter that returns UCOL_DEFAULT if the value is default 1041 * @param coll collator which attributes are to be changed 1042 * @param attr attribute type 1043 * @return attribute value or UCOL_DEFAULT if the value is default 1044 * @param status to indicate whether the operation went on smoothly or there were errors 1045 * @see UColAttribute 1046 * @see UColAttributeValue 1047 * @see ucol_setAttribute 1048 * @internal ICU 3.0 1049 */ 1050U_INTERNAL UColAttributeValue U_EXPORT2 1051ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status); 1052 1053/** Check whether two collators are equal. Collators are considered equal if they 1054 * will sort strings the same. This means that both the current attributes and the 1055 * rules must be equivalent. Currently used for RuleBasedCollator::operator==. 1056 * @param source first collator 1057 * @param target second collator 1058 * @return TRUE or FALSE 1059 * @internal ICU 3.0 1060 */ 1061U_INTERNAL UBool U_EXPORT2 1062ucol_equals(const UCollator *source, const UCollator *target); 1063 1064/** Calculates the set of unsafe code points, given a collator. 1065 * A character is unsafe if you could append any character and cause the ordering to alter significantly. 1066 * Collation sorts in normalized order, so anything that rearranges in normalization can cause this. 1067 * Thus if you have a character like a_umlaut, and you add a lower_dot to it, 1068 * then it normalizes to a_lower_dot + umlaut, and sorts differently. 1069 * @param coll Collator 1070 * @param unsafe a fill-in set to receive the unsafe points 1071 * @param status for catching errors 1072 * @return number of elements in the set 1073 * @internal ICU 3.0 1074 */ 1075U_INTERNAL int32_t U_EXPORT2 1076ucol_getUnsafeSet( const UCollator *coll, 1077 USet *unsafe, 1078 UErrorCode *status); 1079 1080/** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away. 1081 * @internal ICU 3.2.1 1082 */ 1083U_INTERNAL void U_EXPORT2 1084ucol_forgetUCA(void); 1085 1086/** Touches all resources needed for instantiating a collator from a short string definition, 1087 * thus filling up the cache. 1088 * @param definition A short string containing a locale and a set of attributes. 1089 * Attributes not explicitly mentioned are left at the default 1090 * state for a locale. 1091 * @param parseError if not NULL, structure that will get filled with error's pre 1092 * and post context in case of error. 1093 * @param forceDefaults if FALSE, the settings that are the same as the collator 1094 * default settings will not be applied (for example, setting 1095 * French secondary on a French collator would not be executed). 1096 * If TRUE, all the settings will be applied regardless of the 1097 * collator default value. If the definition 1098 * strings are to be cached, should be set to FALSE. 1099 * @param status Error code. Apart from regular error conditions connected to 1100 * instantiating collators (like out of memory or similar), this 1101 * API will return an error if an invalid attribute or attribute/value 1102 * combination is specified. 1103 * @see ucol_openFromShortString 1104 * @internal ICU 3.2.1 1105 */ 1106U_INTERNAL void U_EXPORT2 1107ucol_prepareShortStringOpen( const char *definition, 1108 UBool forceDefaults, 1109 UParseError *parseError, 1110 UErrorCode *status); 1111 1112/** Creates a binary image of a collator. This binary image can be stored and 1113 * later used to instantiate a collator using ucol_openBinary. 1114 * This API supports preflighting. 1115 * @param coll Collator 1116 * @param buffer a fill-in buffer to receive the binary image 1117 * @param capacity capacity of the destination buffer 1118 * @param status for catching errors 1119 * @return size of the image 1120 * @see ucol_openBinary 1121 * @stable ICU 3.2 1122 */ 1123U_STABLE int32_t U_EXPORT2 1124ucol_cloneBinary(const UCollator *coll, 1125 uint8_t *buffer, int32_t capacity, 1126 UErrorCode *status); 1127 1128/** Opens a collator from a collator binary image created using 1129 * ucol_cloneBinary. Binary image used in instantiation of the 1130 * collator remains owned by the user and should stay around for 1131 * the lifetime of the collator. The API also takes a base collator 1132 * which usualy should be UCA. 1133 * @param bin binary image owned by the user and required through the 1134 * lifetime of the collator 1135 * @param length size of the image. If negative, the API will try to 1136 * figure out the length of the image 1137 * @param base fallback collator, usually UCA. Base is required to be 1138 * present through the lifetime of the collator. Currently 1139 * it cannot be NULL. 1140 * @param status for catching errors 1141 * @return newly created collator 1142 * @see ucol_cloneBinary 1143 * @stable ICU 3.2 1144 */ 1145U_STABLE UCollator* U_EXPORT2 1146ucol_openBinary(const uint8_t *bin, int32_t length, 1147 const UCollator *base, 1148 UErrorCode *status); 1149 1150 1151#endif /* #if !UCONFIG_NO_COLLATION */ 1152 1153#endif 1154 1155