ucol.h revision c73f511526464f8e56c242df80552e9b0d94ae3d
1/* 2******************************************************************************* 3* Copyright (c) 1996-2014, International Business Machines Corporation and others. 4* All Rights Reserved. 5******************************************************************************* 6*/ 7 8#ifndef UCOL_H 9#define UCOL_H 10 11#include "unicode/utypes.h" 12 13#if !UCONFIG_NO_COLLATION 14 15#include "unicode/unorm.h" 16#include "unicode/localpointer.h" 17#include "unicode/parseerr.h" 18#include "unicode/uloc.h" 19#include "unicode/uset.h" 20#include "unicode/uscript.h" 21 22/** 23 * \file 24 * \brief C API: Collator 25 * 26 * <h2> Collator C API </h2> 27 * 28 * The C API for Collator performs locale-sensitive 29 * string comparison. You use this service to build 30 * searching and sorting routines for natural language text. 31 * <p> 32 * For more information about the collation service see 33 * <a href="http://userguide.icu-project.org/collation">the User Guide</a>. 34 * <p> 35 * Collation service provides correct sorting orders for most locales supported in ICU. 36 * If specific data for a locale is not available, the orders eventually falls back 37 * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. 38 * <p> 39 * Sort ordering may be customized by providing your own set of rules. For more on 40 * this subject see the <a href="http://userguide.icu-project.org/collation/customization"> 41 * Collation Customization</a> section of the User Guide. 42 * <p> 43 * @see UCollationResult 44 * @see UNormalizationMode 45 * @see UCollationStrength 46 * @see UCollationElements 47 */ 48 49/** A collator. 50* For usage in C programs. 51*/ 52struct UCollator; 53/** structure representing a collator object instance 54 * @stable ICU 2.0 55 */ 56typedef struct UCollator UCollator; 57 58 59/** 60 * UCOL_LESS is returned if source string is compared to be less than target 61 * string in the ucol_strcoll() method. 62 * UCOL_EQUAL is returned if source string is compared to be equal to target 63 * string in the ucol_strcoll() method. 64 * UCOL_GREATER is returned if source string is compared to be greater than 65 * target string in the ucol_strcoll() method. 66 * @see ucol_strcoll() 67 * <p> 68 * Possible values for a comparison result 69 * @stable ICU 2.0 70 */ 71typedef enum { 72 /** string a == string b */ 73 UCOL_EQUAL = 0, 74 /** string a > string b */ 75 UCOL_GREATER = 1, 76 /** string a < string b */ 77 UCOL_LESS = -1 78} UCollationResult ; 79 80 81/** Enum containing attribute values for controling collation behavior. 82 * Here are all the allowable values. Not every attribute can take every value. The only 83 * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined 84 * value for that locale 85 * @stable ICU 2.0 86 */ 87typedef enum { 88 /** accepted by most attributes */ 89 UCOL_DEFAULT = -1, 90 91 /** Primary collation strength */ 92 UCOL_PRIMARY = 0, 93 /** Secondary collation strength */ 94 UCOL_SECONDARY = 1, 95 /** Tertiary collation strength */ 96 UCOL_TERTIARY = 2, 97 /** Default collation strength */ 98 UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, 99 UCOL_CE_STRENGTH_LIMIT, 100 /** Quaternary collation strength */ 101 UCOL_QUATERNARY=3, 102 /** Identical collation strength */ 103 UCOL_IDENTICAL=15, 104 UCOL_STRENGTH_LIMIT, 105 106 /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 107 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 108 & UCOL_DECOMPOSITION_MODE*/ 109 UCOL_OFF = 16, 110 /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 111 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 112 & UCOL_DECOMPOSITION_MODE*/ 113 UCOL_ON = 17, 114 115 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */ 116 UCOL_SHIFTED = 20, 117 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */ 118 UCOL_NON_IGNORABLE = 21, 119 120 /** Valid for UCOL_CASE_FIRST - 121 lower case sorts before upper case */ 122 UCOL_LOWER_FIRST = 24, 123 /** upper case sorts before lower case */ 124 UCOL_UPPER_FIRST = 25, 125 126 UCOL_ATTRIBUTE_VALUE_COUNT 127 128} UColAttributeValue; 129 130/** 131 * Enum containing the codes for reordering segments of the collation table that are not script 132 * codes. These reordering codes are to be used in conjunction with the script codes. 133 * @see ucol_getReorderCodes 134 * @see ucol_setReorderCodes 135 * @see ucol_getEquivalentReorderCodes 136 * @see UScriptCode 137 * @stable ICU 4.8 138 */ 139 typedef enum { 140 /** 141 * A special reordering code that is used to specify the default 142 * reordering codes for a locale. 143 * @stable ICU 4.8 144 */ 145 UCOL_REORDER_CODE_DEFAULT = -1, 146 /** 147 * A special reordering code that is used to specify no reordering codes. 148 * @stable ICU 4.8 149 */ 150 UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN, 151 /** 152 * A special reordering code that is used to specify all other codes used for 153 * reordering except for the codes lised as UColReorderCode values and those 154 * listed explicitly in a reordering. 155 * @stable ICU 4.8 156 */ 157 UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN, 158 /** 159 * Characters with the space property. 160 * This is equivalent to the rule value "space". 161 * @stable ICU 4.8 162 */ 163 UCOL_REORDER_CODE_SPACE = 0x1000, 164 /** 165 * The first entry in the enumeration of reordering groups. This is intended for use in 166 * range checking and enumeration of the reorder codes. 167 * @stable ICU 4.8 168 */ 169 UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE, 170 /** 171 * Characters with the punctuation property. 172 * This is equivalent to the rule value "punct". 173 * @stable ICU 4.8 174 */ 175 UCOL_REORDER_CODE_PUNCTUATION = 0x1001, 176 /** 177 * Characters with the symbol property. 178 * This is equivalent to the rule value "symbol". 179 * @stable ICU 4.8 180 */ 181 UCOL_REORDER_CODE_SYMBOL = 0x1002, 182 /** 183 * Characters with the currency property. 184 * This is equivalent to the rule value "currency". 185 * @stable ICU 4.8 186 */ 187 UCOL_REORDER_CODE_CURRENCY = 0x1003, 188 /** 189 * Characters with the digit property. 190 * This is equivalent to the rule value "digit". 191 * @stable ICU 4.8 192 */ 193 UCOL_REORDER_CODE_DIGIT = 0x1004, 194 /** 195 * The limit of the reorder codes. This is intended for use in range checking 196 * and enumeration of the reorder codes. 197 * @stable ICU 4.8 198 */ 199 UCOL_REORDER_CODE_LIMIT = 0x1005 200} UColReorderCode; 201 202/** 203 * Base letter represents a primary difference. Set comparison 204 * level to UCOL_PRIMARY to ignore secondary and tertiary differences. 205 * Use this to set the strength of a Collator object. 206 * Example of primary difference, "abc" < "abd" 207 * 208 * Diacritical differences on the same base letter represent a secondary 209 * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary 210 * differences. Use this to set the strength of a Collator object. 211 * Example of secondary difference, "ä" >> "a". 212 * 213 * Uppercase and lowercase versions of the same character represents a 214 * tertiary difference. Set comparison level to UCOL_TERTIARY to include 215 * all comparison differences. Use this to set the strength of a Collator 216 * object. 217 * Example of tertiary difference, "abc" <<< "ABC". 218 * 219 * Two characters are considered "identical" when they have the same 220 * unicode spellings. UCOL_IDENTICAL. 221 * For example, "ä" == "ä". 222 * 223 * UCollationStrength is also used to determine the strength of sort keys 224 * generated from UCollator objects 225 * These values can be now found in the UColAttributeValue enum. 226 * @stable ICU 2.0 227 **/ 228typedef UColAttributeValue UCollationStrength; 229 230/** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT 231 * value, as well as the values specific to each one. 232 * @stable ICU 2.0 233 */ 234typedef enum { 235 /** Attribute for direction of secondary weights - used in Canadian French. 236 * Acceptable values are UCOL_ON, which results in secondary weights 237 * being considered backwards and UCOL_OFF which treats secondary 238 * weights in the order they appear. 239 * @stable ICU 2.0 240 */ 241 UCOL_FRENCH_COLLATION, 242 /** Attribute for handling variable elements. 243 * Acceptable values are UCOL_NON_IGNORABLE (default) 244 * which treats all the codepoints with non-ignorable 245 * primary weights in the same way, 246 * and UCOL_SHIFTED which causes codepoints with primary 247 * weights that are equal or below the variable top value 248 * to be ignored on primary level and moved to the quaternary 249 * level. 250 * @stable ICU 2.0 251 */ 252 UCOL_ALTERNATE_HANDLING, 253 /** Controls the ordering of upper and lower case letters. 254 * Acceptable values are UCOL_OFF (default), which orders 255 * upper and lower case letters in accordance to their tertiary 256 * weights, UCOL_UPPER_FIRST which forces upper case letters to 257 * sort before lower case letters, and UCOL_LOWER_FIRST which does 258 * the opposite. 259 * @stable ICU 2.0 260 */ 261 UCOL_CASE_FIRST, 262 /** Controls whether an extra case level (positioned before the third 263 * level) is generated or not. Acceptable values are UCOL_OFF (default), 264 * when case level is not generated, and UCOL_ON which causes the case 265 * level to be generated. Contents of the case level are affected by 266 * the value of UCOL_CASE_FIRST attribute. A simple way to ignore 267 * accent differences in a string is to set the strength to UCOL_PRIMARY 268 * and enable case level. 269 * @stable ICU 2.0 270 */ 271 UCOL_CASE_LEVEL, 272 /** Controls whether the normalization check and necessary normalizations 273 * are performed. When set to UCOL_OFF (default) no normalization check 274 * is performed. The correctness of the result is guaranteed only if the 275 * input data is in so-called FCD form (see users manual for more info). 276 * When set to UCOL_ON, an incremental check is performed to see whether 277 * the input data is in the FCD form. If the data is not in the FCD form, 278 * incremental NFD normalization is performed. 279 * @stable ICU 2.0 280 */ 281 UCOL_NORMALIZATION_MODE, 282 /** An alias for UCOL_NORMALIZATION_MODE attribute. 283 * @stable ICU 2.0 284 */ 285 UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, 286 /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, 287 * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength 288 * for most locales (except Japanese) is tertiary. 289 * 290 * Quaternary strength 291 * is useful when combined with shifted setting for alternate handling 292 * attribute and for JIS X 4061 collation, when it is used to distinguish 293 * between Katakana and Hiragana. 294 * Otherwise, quaternary level 295 * is affected only by the number of non-ignorable code points in 296 * the string. 297 * 298 * Identical strength is rarely useful, as it amounts 299 * to codepoints of the NFD form of the string. 300 * @stable ICU 2.0 301 */ 302 UCOL_STRENGTH, 303#ifndef U_HIDE_DEPRECATED_API 304 /** When turned on, this attribute positions Hiragana before all 305 * non-ignorables on quaternary level This is a sneaky way to produce JIS 306 * sort order. 307 * 308 * This attribute was an implementation detail of the CLDR Japanese tailoring. 309 * Since ICU 50, this attribute is not settable any more via API functions. 310 * Since CLDR 25/ICU 53, explicit quaternary relations are used 311 * to achieve the same Japanese sort order. 312 * 313 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 314 */ 315 UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1, 316#endif /* U_HIDE_DEPRECATED_API */ 317 /** 318 * When turned on, this attribute makes 319 * substrings of digits sort according to their numeric values. 320 * 321 * This is a way to get '100' to sort AFTER '2'. Note that the longest 322 * digit substring that can be treated as a single unit is 323 * 254 digits (not counting leading zeros). If a digit substring is 324 * longer than that, the digits beyond the limit will be treated as a 325 * separate digit substring. 326 * 327 * A "digit" in this sense is a code point with General_Category=Nd, 328 * which does not include circled numbers, roman numerals, etc. 329 * Only a contiguous digit substring is considered, that is, 330 * non-negative integers without separators. 331 * There is no support for plus/minus signs, decimals, exponents, etc. 332 * 333 * @stable ICU 2.8 334 */ 335 UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, 336 /** 337 * The number of UColAttribute constants. 338 * @stable ICU 2.0 339 */ 340 UCOL_ATTRIBUTE_COUNT 341} UColAttribute; 342 343/** Options for retrieving the rule string 344 * @stable ICU 2.0 345 */ 346typedef enum { 347 /** 348 * Retrieves the tailoring rules only. 349 * Same as calling the version of getRules() without UColRuleOption. 350 * @stable ICU 2.0 351 */ 352 UCOL_TAILORING_ONLY, 353 /** 354 * Retrieves the "UCA rules" concatenated with the tailoring rules. 355 * The "UCA rules" are an <i>approximation</i> of the root collator's sort order. 356 * They are almost never used or useful at runtime and can be removed from the data. 357 * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales 358 * @stable ICU 2.0 359 */ 360 UCOL_FULL_RULES 361} UColRuleOption ; 362 363/** 364 * Open a UCollator for comparing strings. 365 * 366 * For some languages, multiple collation types are available; 367 * for example, "de@collation=phonebook". 368 * Starting with ICU 54, collation attributes can be specified via locale keywords as well, 369 * in the old locale extension syntax ("el@colCaseFirst=upper") 370 * or in language tag syntax ("el-u-kf-upper"). 371 * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>. 372 * 373 * The UCollator pointer is used in all the calls to the Collation 374 * service. After finished, collator must be disposed of by calling 375 * {@link #ucol_close }. 376 * @param loc The locale containing the required collation rules. 377 * Special values for locales can be passed in - 378 * if NULL is passed for the locale, the default locale 379 * collation rules will be used. If empty string ("") or 380 * "root" are passed, the root collator will be returned. 381 * @param status A pointer to a UErrorCode to receive any errors 382 * @return A pointer to a UCollator, or 0 if an error occurred. 383 * @see ucol_openRules 384 * @see ucol_safeClone 385 * @see ucol_close 386 * @stable ICU 2.0 387 */ 388U_STABLE UCollator* U_EXPORT2 389ucol_open(const char *loc, UErrorCode *status); 390 391/** 392 * Produce a UCollator instance according to the rules supplied. 393 * The rules are used to change the default ordering, defined in the 394 * UCA in a process called tailoring. The resulting UCollator pointer 395 * can be used in the same way as the one obtained by {@link #ucol_strcoll }. 396 * @param rules A string describing the collation rules. For the syntax 397 * of the rules please see users guide. 398 * @param rulesLength The length of rules, or -1 if null-terminated. 399 * @param normalizationMode The normalization mode: One of 400 * UCOL_OFF (expect the text to not need normalization), 401 * UCOL_ON (normalize), or 402 * UCOL_DEFAULT (set the mode according to the rules) 403 * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 404 * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules. 405 * @param parseError A pointer to UParseError to recieve information about errors 406 * occurred during parsing. This argument can currently be set 407 * to NULL, but at users own risk. Please provide a real structure. 408 * @param status A pointer to a UErrorCode to receive any errors 409 * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case 410 * of error - please use status argument to check for errors. 411 * @see ucol_open 412 * @see ucol_safeClone 413 * @see ucol_close 414 * @stable ICU 2.0 415 */ 416U_STABLE UCollator* U_EXPORT2 417ucol_openRules( const UChar *rules, 418 int32_t rulesLength, 419 UColAttributeValue normalizationMode, 420 UCollationStrength strength, 421 UParseError *parseError, 422 UErrorCode *status); 423 424/** 425 * Open a collator defined by a short form string. 426 * The structure and the syntax of the string is defined in the "Naming collators" 427 * section of the users guide: 428 * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme 429 * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final 430 * strength will be 3. 3066bis locale overrides individual locale parts. 431 * The call to this function is equivalent to a call to ucol_open, followed by a 432 * series of calls to ucol_setAttribute and ucol_setVariableTop. 433 * @param definition A short string containing a locale and a set of attributes. 434 * Attributes not explicitly mentioned are left at the default 435 * state for a locale. 436 * @param parseError if not NULL, structure that will get filled with error's pre 437 * and post context in case of error. 438 * @param forceDefaults if FALSE, the settings that are the same as the collator 439 * default settings will not be applied (for example, setting 440 * French secondary on a French collator would not be executed). 441 * If TRUE, all the settings will be applied regardless of the 442 * collator default value. If the definition 443 * strings are to be cached, should be set to FALSE. 444 * @param status Error code. Apart from regular error conditions connected to 445 * instantiating collators (like out of memory or similar), this 446 * API will return an error if an invalid attribute or attribute/value 447 * combination is specified. 448 * @return A pointer to a UCollator or 0 if an error occured (including an 449 * invalid attribute). 450 * @see ucol_open 451 * @see ucol_setAttribute 452 * @see ucol_setVariableTop 453 * @see ucol_getShortDefinitionString 454 * @see ucol_normalizeShortDefinitionString 455 * @stable ICU 3.0 456 * 457 */ 458U_STABLE UCollator* U_EXPORT2 459ucol_openFromShortString( const char *definition, 460 UBool forceDefaults, 461 UParseError *parseError, 462 UErrorCode *status); 463 464#ifndef U_HIDE_DEPRECATED_API 465/** 466 * Get a set containing the contractions defined by the collator. The set includes 467 * both the root collator's contractions and the contractions defined by the collator. This set 468 * will contain only strings. If a tailoring explicitly suppresses contractions from 469 * the root collator (like Russian), removed contractions will not be in the resulting set. 470 * @param coll collator 471 * @param conts the set to hold the result. It gets emptied before 472 * contractions are added. 473 * @param status to hold the error code 474 * @return the size of the contraction set 475 * 476 * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead 477 */ 478U_DEPRECATED int32_t U_EXPORT2 479ucol_getContractions( const UCollator *coll, 480 USet *conts, 481 UErrorCode *status); 482#endif /* U_HIDE_DEPRECATED_API */ 483 484/** 485 * Get a set containing the expansions defined by the collator. The set includes 486 * both the root collator's expansions and the expansions defined by the tailoring 487 * @param coll collator 488 * @param contractions if not NULL, the set to hold the contractions 489 * @param expansions if not NULL, the set to hold the expansions 490 * @param addPrefixes add the prefix contextual elements to contractions 491 * @param status to hold the error code 492 * 493 * @stable ICU 3.4 494 */ 495U_STABLE void U_EXPORT2 496ucol_getContractionsAndExpansions( const UCollator *coll, 497 USet *contractions, USet *expansions, 498 UBool addPrefixes, UErrorCode *status); 499 500/** 501 * Close a UCollator. 502 * Once closed, a UCollator should not be used. Every open collator should 503 * be closed. Otherwise, a memory leak will result. 504 * @param coll The UCollator to close. 505 * @see ucol_open 506 * @see ucol_openRules 507 * @see ucol_safeClone 508 * @stable ICU 2.0 509 */ 510U_STABLE void U_EXPORT2 511ucol_close(UCollator *coll); 512 513#if U_SHOW_CPLUSPLUS_API 514 515U_NAMESPACE_BEGIN 516 517/** 518 * \class LocalUCollatorPointer 519 * "Smart pointer" class, closes a UCollator via ucol_close(). 520 * For most methods see the LocalPointerBase base class. 521 * 522 * @see LocalPointerBase 523 * @see LocalPointer 524 * @stable ICU 4.4 525 */ 526U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close); 527 528U_NAMESPACE_END 529 530#endif 531 532/** 533 * Compare two strings. 534 * The strings will be compared using the options already specified. 535 * @param coll The UCollator containing the comparison rules. 536 * @param source The source string. 537 * @param sourceLength The length of source, or -1 if null-terminated. 538 * @param target The target string. 539 * @param targetLength The length of target, or -1 if null-terminated. 540 * @return The result of comparing the strings; one of UCOL_EQUAL, 541 * UCOL_GREATER, UCOL_LESS 542 * @see ucol_greater 543 * @see ucol_greaterOrEqual 544 * @see ucol_equal 545 * @stable ICU 2.0 546 */ 547U_STABLE UCollationResult U_EXPORT2 548ucol_strcoll( const UCollator *coll, 549 const UChar *source, 550 int32_t sourceLength, 551 const UChar *target, 552 int32_t targetLength); 553 554/** 555* Compare two strings in UTF-8. 556* The strings will be compared using the options already specified. 557* Note: When input string contains malformed a UTF-8 byte sequence, 558* this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). 559* @param coll The UCollator containing the comparison rules. 560* @param source The source UTF-8 string. 561* @param sourceLength The length of source, or -1 if null-terminated. 562* @param target The target UTF-8 string. 563* @param targetLength The length of target, or -1 if null-terminated. 564* @param status A pointer to a UErrorCode to receive any errors 565* @return The result of comparing the strings; one of UCOL_EQUAL, 566* UCOL_GREATER, UCOL_LESS 567* @see ucol_greater 568* @see ucol_greaterOrEqual 569* @see ucol_equal 570* @stable ICU 50 571*/ 572U_STABLE UCollationResult U_EXPORT2 573ucol_strcollUTF8( 574 const UCollator *coll, 575 const char *source, 576 int32_t sourceLength, 577 const char *target, 578 int32_t targetLength, 579 UErrorCode *status); 580 581/** 582 * Determine if one string is greater than another. 583 * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER 584 * @param coll The UCollator containing the comparison rules. 585 * @param source The source string. 586 * @param sourceLength The length of source, or -1 if null-terminated. 587 * @param target The target string. 588 * @param targetLength The length of target, or -1 if null-terminated. 589 * @return TRUE if source is greater than target, FALSE otherwise. 590 * @see ucol_strcoll 591 * @see ucol_greaterOrEqual 592 * @see ucol_equal 593 * @stable ICU 2.0 594 */ 595U_STABLE UBool U_EXPORT2 596ucol_greater(const UCollator *coll, 597 const UChar *source, int32_t sourceLength, 598 const UChar *target, int32_t targetLength); 599 600/** 601 * Determine if one string is greater than or equal to another. 602 * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS 603 * @param coll The UCollator containing the comparison rules. 604 * @param source The source string. 605 * @param sourceLength The length of source, or -1 if null-terminated. 606 * @param target The target string. 607 * @param targetLength The length of target, or -1 if null-terminated. 608 * @return TRUE if source is greater than or equal to target, FALSE otherwise. 609 * @see ucol_strcoll 610 * @see ucol_greater 611 * @see ucol_equal 612 * @stable ICU 2.0 613 */ 614U_STABLE UBool U_EXPORT2 615ucol_greaterOrEqual(const UCollator *coll, 616 const UChar *source, int32_t sourceLength, 617 const UChar *target, int32_t targetLength); 618 619/** 620 * Compare two strings for equality. 621 * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL 622 * @param coll The UCollator containing the comparison rules. 623 * @param source The source string. 624 * @param sourceLength The length of source, or -1 if null-terminated. 625 * @param target The target string. 626 * @param targetLength The length of target, or -1 if null-terminated. 627 * @return TRUE if source is equal to target, FALSE otherwise 628 * @see ucol_strcoll 629 * @see ucol_greater 630 * @see ucol_greaterOrEqual 631 * @stable ICU 2.0 632 */ 633U_STABLE UBool U_EXPORT2 634ucol_equal(const UCollator *coll, 635 const UChar *source, int32_t sourceLength, 636 const UChar *target, int32_t targetLength); 637 638/** 639 * Compare two UTF-8 encoded trings. 640 * The strings will be compared using the options already specified. 641 * @param coll The UCollator containing the comparison rules. 642 * @param sIter The source string iterator. 643 * @param tIter The target string iterator. 644 * @return The result of comparing the strings; one of UCOL_EQUAL, 645 * UCOL_GREATER, UCOL_LESS 646 * @param status A pointer to a UErrorCode to receive any errors 647 * @see ucol_strcoll 648 * @stable ICU 2.6 649 */ 650U_STABLE UCollationResult U_EXPORT2 651ucol_strcollIter( const UCollator *coll, 652 UCharIterator *sIter, 653 UCharIterator *tIter, 654 UErrorCode *status); 655 656/** 657 * Get the collation strength used in a UCollator. 658 * The strength influences how strings are compared. 659 * @param coll The UCollator to query. 660 * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 661 * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL 662 * @see ucol_setStrength 663 * @stable ICU 2.0 664 */ 665U_STABLE UCollationStrength U_EXPORT2 666ucol_getStrength(const UCollator *coll); 667 668/** 669 * Set the collation strength used in a UCollator. 670 * The strength influences how strings are compared. 671 * @param coll The UCollator to set. 672 * @param strength The desired collation strength; one of UCOL_PRIMARY, 673 * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT 674 * @see ucol_getStrength 675 * @stable ICU 2.0 676 */ 677U_STABLE void U_EXPORT2 678ucol_setStrength(UCollator *coll, 679 UCollationStrength strength); 680 681/** 682 * Retrieves the reordering codes for this collator. 683 * These reordering codes are a combination of UScript codes and UColReorderCode entries. 684 * @param coll The UCollator to query. 685 * @param dest The array to fill with the script ordering. 686 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 687 * will only return the length of the result without writing any of the result string (pre-flighting). 688 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 689 * failure before the function call. 690 * @return The number of reordering codes written to the dest array. 691 * @see ucol_setReorderCodes 692 * @see ucol_getEquivalentReorderCodes 693 * @see UScriptCode 694 * @see UColReorderCode 695 * @stable ICU 4.8 696 */ 697U_STABLE int32_t U_EXPORT2 698ucol_getReorderCodes(const UCollator* coll, 699 int32_t* dest, 700 int32_t destCapacity, 701 UErrorCode *pErrorCode); 702/** 703 * Sets the reordering codes for this collator. 704 * Collation reordering allows scripts and some other defined blocks of characters 705 * to be moved relative to each other as a block. This reordering is done on top of 706 * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 707 * at the start and/or the end of the collation order. These groups are specified using 708 * UScript codes and UColReorderCode entries. 709 * <p>By default, reordering codes specified for the start of the order are placed in the 710 * order given after a group of "special" non-script blocks. These special groups of characters 711 * are space, punctuation, symbol, currency, and digit. These special groups are represented with 712 * UColReorderCode entries. Script groups can be intermingled with 713 * these special non-script blocks if those special blocks are explicitly specified in the reordering. 714 * <p>The special code OTHERS stands for any script that is not explicitly 715 * mentioned in the list of reordering codes given. Anything that is after OTHERS 716 * will go at the very end of the reordering in the order given. 717 * <p>The special reorder code DEFAULT will reset the reordering for this collator 718 * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that 719 * was specified when this collator was created from resource data or from rules. The 720 * DEFAULT code <b>must</b> be the sole code supplied when it used. If not 721 * that will result in a U_ILLEGAL_ARGUMENT_ERROR being set. 722 * <p>The special reorder code NONE will remove any reordering for this collator. 723 * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The 724 * NONE code <b>must</b> be the sole code supplied when it used. 725 * @param coll The UCollator to set. 726 * @param reorderCodes An array of script codes in the new order. This can be NULL if the 727 * length is also set to 0. An empty array will clear any reordering codes on the collator. 728 * @param reorderCodesLength The length of reorderCodes. 729 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 730 * failure before the function call. 731 * @see ucol_getReorderCodes 732 * @see ucol_getEquivalentReorderCodes 733 * @see UScriptCode 734 * @see UColReorderCode 735 * @stable ICU 4.8 736 */ 737U_STABLE void U_EXPORT2 738ucol_setReorderCodes(UCollator* coll, 739 const int32_t* reorderCodes, 740 int32_t reorderCodesLength, 741 UErrorCode *pErrorCode); 742 743/** 744 * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder 745 * codes will be grouped and must reorder together. 746 * @param reorderCode The reorder code to determine equivalence for. 747 * @param dest The array to fill with the script ordering. 748 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 749 * will only return the length of the result without writing any of the result string (pre-flighting). 750 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate 751 * a failure before the function call. 752 * @return The number of reordering codes written to the dest array. 753 * @see ucol_setReorderCodes 754 * @see ucol_getReorderCodes 755 * @see UScriptCode 756 * @see UColReorderCode 757 * @stable ICU 4.8 758 */ 759U_STABLE int32_t U_EXPORT2 760ucol_getEquivalentReorderCodes(int32_t reorderCode, 761 int32_t* dest, 762 int32_t destCapacity, 763 UErrorCode *pErrorCode); 764 765/** 766 * Get the display name for a UCollator. 767 * The display name is suitable for presentation to a user. 768 * @param objLoc The locale of the collator in question. 769 * @param dispLoc The locale for display. 770 * @param result A pointer to a buffer to receive the attribute. 771 * @param resultLength The maximum size of result. 772 * @param status A pointer to a UErrorCode to receive any errors 773 * @return The total buffer size needed; if greater than resultLength, 774 * the output was truncated. 775 * @stable ICU 2.0 776 */ 777U_STABLE int32_t U_EXPORT2 778ucol_getDisplayName( const char *objLoc, 779 const char *dispLoc, 780 UChar *result, 781 int32_t resultLength, 782 UErrorCode *status); 783 784/** 785 * Get a locale for which collation rules are available. 786 * A UCollator in a locale returned by this function will perform the correct 787 * collation for the locale. 788 * @param localeIndex The index of the desired locale. 789 * @return A locale for which collation rules are available, or 0 if none. 790 * @see ucol_countAvailable 791 * @stable ICU 2.0 792 */ 793U_STABLE const char* U_EXPORT2 794ucol_getAvailable(int32_t localeIndex); 795 796/** 797 * Determine how many locales have collation rules available. 798 * This function is most useful as determining the loop ending condition for 799 * calls to {@link #ucol_getAvailable }. 800 * @return The number of locales for which collation rules are available. 801 * @see ucol_getAvailable 802 * @stable ICU 2.0 803 */ 804U_STABLE int32_t U_EXPORT2 805ucol_countAvailable(void); 806 807#if !UCONFIG_NO_SERVICE 808/** 809 * Create a string enumerator of all locales for which a valid 810 * collator may be opened. 811 * @param status input-output error code 812 * @return a string enumeration over locale strings. The caller is 813 * responsible for closing the result. 814 * @stable ICU 3.0 815 */ 816U_STABLE UEnumeration* U_EXPORT2 817ucol_openAvailableLocales(UErrorCode *status); 818#endif 819 820/** 821 * Create a string enumerator of all possible keywords that are relevant to 822 * collation. At this point, the only recognized keyword for this 823 * service is "collation". 824 * @param status input-output error code 825 * @return a string enumeration over locale strings. The caller is 826 * responsible for closing the result. 827 * @stable ICU 3.0 828 */ 829U_STABLE UEnumeration* U_EXPORT2 830ucol_getKeywords(UErrorCode *status); 831 832/** 833 * Given a keyword, create a string enumeration of all values 834 * for that keyword that are currently in use. 835 * @param keyword a particular keyword as enumerated by 836 * ucol_getKeywords. If any other keyword is passed in, *status is set 837 * to U_ILLEGAL_ARGUMENT_ERROR. 838 * @param status input-output error code 839 * @return a string enumeration over collation keyword values, or NULL 840 * upon error. The caller is responsible for closing the result. 841 * @stable ICU 3.0 842 */ 843U_STABLE UEnumeration* U_EXPORT2 844ucol_getKeywordValues(const char *keyword, UErrorCode *status); 845 846/** 847 * Given a key and a locale, returns an array of string values in a preferred 848 * order that would make a difference. These are all and only those values where 849 * the open (creation) of the service with the locale formed from the input locale 850 * plus input keyword and that value has different behavior than creation with the 851 * input locale alone. 852 * @param key one of the keys supported by this service. For now, only 853 * "collation" is supported. 854 * @param locale the locale 855 * @param commonlyUsed if set to true it will return only commonly used values 856 * with the given locale in preferred order. Otherwise, 857 * it will return all the available values for the locale. 858 * @param status error status 859 * @return a string enumeration over keyword values for the given key and the locale. 860 * @stable ICU 4.2 861 */ 862U_STABLE UEnumeration* U_EXPORT2 863ucol_getKeywordValuesForLocale(const char* key, 864 const char* locale, 865 UBool commonlyUsed, 866 UErrorCode* status); 867 868/** 869 * Return the functionally equivalent locale for the given 870 * requested locale, with respect to given keyword, for the 871 * collation service. If two locales return the same result, then 872 * collators instantiated for these locales will behave 873 * equivalently. The converse is not always true; two collators 874 * may in fact be equivalent, but return different results, due to 875 * internal details. The return result has no other meaning than 876 * that stated above, and implies nothing as to the relationship 877 * between the two locales. This is intended for use by 878 * applications who wish to cache collators, or otherwise reuse 879 * collators when possible. The functional equivalent may change 880 * over time. For more information, please see the <a 881 * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services"> 882 * Locales and Services</a> section of the ICU User Guide. 883 * @param result fillin for the functionally equivalent locale 884 * @param resultCapacity capacity of the fillin buffer 885 * @param keyword a particular keyword as enumerated by 886 * ucol_getKeywords. 887 * @param locale the requested locale 888 * @param isAvailable if non-NULL, pointer to a fillin parameter that 889 * indicates whether the requested locale was 'available' to the 890 * collation service. A locale is defined as 'available' if it 891 * physically exists within the collation locale data. 892 * @param status pointer to input-output error code 893 * @return the actual buffer size needed for the locale. If greater 894 * than resultCapacity, the returned full name will be truncated and 895 * an error code will be returned. 896 * @stable ICU 3.0 897 */ 898U_STABLE int32_t U_EXPORT2 899ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 900 const char* keyword, const char* locale, 901 UBool* isAvailable, UErrorCode* status); 902 903/** 904 * Get the collation tailoring rules from a UCollator. 905 * The rules will follow the rule syntax. 906 * @param coll The UCollator to query. 907 * @param length 908 * @return The collation tailoring rules. 909 * @stable ICU 2.0 910 */ 911U_STABLE const UChar* U_EXPORT2 912ucol_getRules( const UCollator *coll, 913 int32_t *length); 914 915/** Get the short definition string for a collator. This API harvests the collator's 916 * locale and the attribute set and produces a string that can be used for opening 917 * a collator with the same attributes using the ucol_openFromShortString API. 918 * This string will be normalized. 919 * The structure and the syntax of the string is defined in the "Naming collators" 920 * section of the users guide: 921 * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme 922 * This API supports preflighting. 923 * @param coll a collator 924 * @param locale a locale that will appear as a collators locale in the resulting 925 * short string definition. If NULL, the locale will be harvested 926 * from the collator. 927 * @param buffer space to hold the resulting string 928 * @param capacity capacity of the buffer 929 * @param status for returning errors. All the preflighting errors are featured 930 * @return length of the resulting string 931 * @see ucol_openFromShortString 932 * @see ucol_normalizeShortDefinitionString 933 * @stable ICU 3.0 934 */ 935U_STABLE int32_t U_EXPORT2 936ucol_getShortDefinitionString(const UCollator *coll, 937 const char *locale, 938 char *buffer, 939 int32_t capacity, 940 UErrorCode *status); 941 942/** Verifies and normalizes short definition string. 943 * Normalized short definition string has all the option sorted by the argument name, 944 * so that equivalent definition strings are the same. 945 * This API supports preflighting. 946 * @param source definition string 947 * @param destination space to hold the resulting string 948 * @param capacity capacity of the buffer 949 * @param parseError if not NULL, structure that will get filled with error's pre 950 * and post context in case of error. 951 * @param status Error code. This API will return an error if an invalid attribute 952 * or attribute/value combination is specified. All the preflighting 953 * errors are also featured 954 * @return length of the resulting normalized string. 955 * 956 * @see ucol_openFromShortString 957 * @see ucol_getShortDefinitionString 958 * 959 * @stable ICU 3.0 960 */ 961 962U_STABLE int32_t U_EXPORT2 963ucol_normalizeShortDefinitionString(const char *source, 964 char *destination, 965 int32_t capacity, 966 UParseError *parseError, 967 UErrorCode *status); 968 969 970/** 971 * Get a sort key for a string from a UCollator. 972 * Sort keys may be compared using <TT>strcmp</TT>. 973 * 974 * Like ICU functions that write to an output buffer, the buffer contents 975 * is undefined if the buffer capacity (resultLength parameter) is too small. 976 * Unlike ICU functions that write a string to an output buffer, 977 * the terminating zero byte is counted in the sort key length. 978 * @param coll The UCollator containing the collation rules. 979 * @param source The string to transform. 980 * @param sourceLength The length of source, or -1 if null-terminated. 981 * @param result A pointer to a buffer to receive the attribute. 982 * @param resultLength The maximum size of result. 983 * @return The size needed to fully store the sort key. 984 * If there was an internal error generating the sort key, 985 * a zero value is returned. 986 * @see ucol_keyHashCode 987 * @stable ICU 2.0 988 */ 989U_STABLE int32_t U_EXPORT2 990ucol_getSortKey(const UCollator *coll, 991 const UChar *source, 992 int32_t sourceLength, 993 uint8_t *result, 994 int32_t resultLength); 995 996 997/** Gets the next count bytes of a sort key. Caller needs 998 * to preserve state array between calls and to provide 999 * the same type of UCharIterator set with the same string. 1000 * The destination buffer provided must be big enough to store 1001 * the number of requested bytes. 1002 * 1003 * The generated sort key may or may not be compatible with 1004 * sort keys generated using ucol_getSortKey(). 1005 * @param coll The UCollator containing the collation rules. 1006 * @param iter UCharIterator containing the string we need 1007 * the sort key to be calculated for. 1008 * @param state Opaque state of sortkey iteration. 1009 * @param dest Buffer to hold the resulting sortkey part 1010 * @param count number of sort key bytes required. 1011 * @param status error code indicator. 1012 * @return the actual number of bytes of a sortkey. It can be 1013 * smaller than count if we have reached the end of 1014 * the sort key. 1015 * @stable ICU 2.6 1016 */ 1017U_STABLE int32_t U_EXPORT2 1018ucol_nextSortKeyPart(const UCollator *coll, 1019 UCharIterator *iter, 1020 uint32_t state[2], 1021 uint8_t *dest, int32_t count, 1022 UErrorCode *status); 1023 1024/** enum that is taken by ucol_getBound API 1025 * See below for explanation 1026 * do not change the values assigned to the 1027 * members of this enum. Underlying code 1028 * depends on them having these numbers 1029 * @stable ICU 2.0 1030 */ 1031typedef enum { 1032 /** lower bound */ 1033 UCOL_BOUND_LOWER = 0, 1034 /** upper bound that will match strings of exact size */ 1035 UCOL_BOUND_UPPER = 1, 1036 /** upper bound that will match all the strings that have the same initial substring as the given string */ 1037 UCOL_BOUND_UPPER_LONG = 2, 1038 UCOL_BOUND_VALUE_COUNT 1039} UColBoundMode; 1040 1041/** 1042 * Produce a bound for a given sortkey and a number of levels. 1043 * Return value is always the number of bytes needed, regardless of 1044 * whether the result buffer was big enough or even valid.<br> 1045 * Resulting bounds can be used to produce a range of strings that are 1046 * between upper and lower bounds. For example, if bounds are produced 1047 * for a sortkey of string "smith", strings between upper and lower 1048 * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> 1049 * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER 1050 * is produced, strings matched would be as above. However, if bound 1051 * produced using UCOL_BOUND_UPPER_LONG is used, the above example will 1052 * also match "Smithsonian" and similar.<br> 1053 * For more on usage, see example in cintltst/capitst.c in procedure 1054 * TestBounds. 1055 * Sort keys may be compared using <TT>strcmp</TT>. 1056 * @param source The source sortkey. 1057 * @param sourceLength The length of source, or -1 if null-terminated. 1058 * (If an unmodified sortkey is passed, it is always null 1059 * terminated). 1060 * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 1061 * produces a lower inclusive bound, UCOL_BOUND_UPPER, that 1062 * produces upper bound that matches strings of the same length 1063 * or UCOL_BOUND_UPPER_LONG that matches strings that have the 1064 * same starting substring as the source string. 1065 * @param noOfLevels Number of levels required in the resulting bound (for most 1066 * uses, the recommended value is 1). See users guide for 1067 * explanation on number of levels a sortkey can have. 1068 * @param result A pointer to a buffer to receive the resulting sortkey. 1069 * @param resultLength The maximum size of result. 1070 * @param status Used for returning error code if something went wrong. If the 1071 * number of levels requested is higher than the number of levels 1072 * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 1073 * issued. 1074 * @return The size needed to fully store the bound. 1075 * @see ucol_keyHashCode 1076 * @stable ICU 2.1 1077 */ 1078U_STABLE int32_t U_EXPORT2 1079ucol_getBound(const uint8_t *source, 1080 int32_t sourceLength, 1081 UColBoundMode boundType, 1082 uint32_t noOfLevels, 1083 uint8_t *result, 1084 int32_t resultLength, 1085 UErrorCode *status); 1086 1087/** 1088 * Gets the version information for a Collator. Version is currently 1089 * an opaque 32-bit number which depends, among other things, on major 1090 * versions of the collator tailoring and UCA. 1091 * @param coll The UCollator to query. 1092 * @param info the version # information, the result will be filled in 1093 * @stable ICU 2.0 1094 */ 1095U_STABLE void U_EXPORT2 1096ucol_getVersion(const UCollator* coll, UVersionInfo info); 1097 1098/** 1099 * Gets the UCA version information for a Collator. Version is the 1100 * UCA version number (3.1.1, 4.0). 1101 * @param coll The UCollator to query. 1102 * @param info the version # information, the result will be filled in 1103 * @stable ICU 2.8 1104 */ 1105U_STABLE void U_EXPORT2 1106ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); 1107 1108/** 1109 * Merges two sort keys. The levels are merged with their corresponding counterparts 1110 * (primaries with primaries, secondaries with secondaries etc.). Between the values 1111 * from the same level a separator is inserted. 1112 * 1113 * This is useful, for example, for combining sort keys from first and last names 1114 * to sort such pairs. 1115 * It is possible to merge multiple sort keys by consecutively merging 1116 * another one with the intermediate result. 1117 * 1118 * The length of the merge result is the sum of the lengths of the input sort keys. 1119 * 1120 * Example (uncompressed): 1121 * <pre>191B1D 01 050505 01 910505 00 1122 * 1F2123 01 050505 01 910505 00</pre> 1123 * will be merged as 1124 * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre> 1125 * 1126 * If the destination buffer is not big enough, then its contents are undefined. 1127 * If any of source lengths are zero or any of the source pointers are NULL/undefined, 1128 * the result is of size zero. 1129 * 1130 * @param src1 the first sort key 1131 * @param src1Length the length of the first sort key, including the zero byte at the end; 1132 * can be -1 if the function is to find the length 1133 * @param src2 the second sort key 1134 * @param src2Length the length of the second sort key, including the zero byte at the end; 1135 * can be -1 if the function is to find the length 1136 * @param dest the buffer where the merged sort key is written, 1137 * can be NULL if destCapacity==0 1138 * @param destCapacity the number of bytes in the dest buffer 1139 * @return the length of the merged sort key, src1Length+src2Length; 1140 * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), 1141 * in which cases the contents of dest is undefined 1142 * @stable ICU 2.0 1143 */ 1144U_STABLE int32_t U_EXPORT2 1145ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, 1146 const uint8_t *src2, int32_t src2Length, 1147 uint8_t *dest, int32_t destCapacity); 1148 1149/** 1150 * Universal attribute setter 1151 * @param coll collator which attributes are to be changed 1152 * @param attr attribute type 1153 * @param value attribute value 1154 * @param status to indicate whether the operation went on smoothly or there were errors 1155 * @see UColAttribute 1156 * @see UColAttributeValue 1157 * @see ucol_getAttribute 1158 * @stable ICU 2.0 1159 */ 1160U_STABLE void U_EXPORT2 1161ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); 1162 1163/** 1164 * Universal attribute getter 1165 * @param coll collator which attributes are to be changed 1166 * @param attr attribute type 1167 * @return attribute value 1168 * @param status to indicate whether the operation went on smoothly or there were errors 1169 * @see UColAttribute 1170 * @see UColAttributeValue 1171 * @see ucol_setAttribute 1172 * @stable ICU 2.0 1173 */ 1174U_STABLE UColAttributeValue U_EXPORT2 1175ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status); 1176 1177#ifndef U_HIDE_DRAFT_API 1178 1179/** 1180 * Sets the variable top to the top of the specified reordering group. 1181 * The variable top determines the highest-sorting character 1182 * which is affected by UCOL_ALTERNATE_HANDLING. 1183 * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. 1184 * @param coll the collator 1185 * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, 1186 * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; 1187 * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group 1188 * @param pErrorCode Standard ICU error code. Its input value must 1189 * pass the U_SUCCESS() test, or else the function returns 1190 * immediately. Check for U_FAILURE() on output or use with 1191 * function chaining. (See User Guide for details.) 1192 * @see ucol_getMaxVariable 1193 * @draft ICU 53 1194 */ 1195U_DRAFT void U_EXPORT2 1196ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode); 1197 1198/** 1199 * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. 1200 * @param coll the collator 1201 * @return the maximum variable reordering group. 1202 * @see ucol_setMaxVariable 1203 * @draft ICU 53 1204 */ 1205U_DRAFT UColReorderCode U_EXPORT2 1206ucol_getMaxVariable(const UCollator *coll); 1207 1208#endif /* U_HIDE_DRAFT_API */ 1209 1210/** 1211 * Sets the variable top to the primary weight of the specified string. 1212 * 1213 * Beginning with ICU 53, the variable top is pinned to 1214 * the top of one of the supported reordering groups, 1215 * and it must not be beyond the last of those groups. 1216 * See ucol_setMaxVariable(). 1217 * @param coll the collator 1218 * @param varTop one or more (if contraction) UChars to which the variable top should be set 1219 * @param len length of variable top string. If -1 it is considered to be zero terminated. 1220 * @param status error code. If error code is set, the return value is undefined. 1221 * Errors set by this function are:<br> 1222 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> 1223 * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond 1224 * the last reordering group supported by ucol_setMaxVariable() 1225 * @return variable top primary weight 1226 * @see ucol_getVariableTop 1227 * @see ucol_restoreVariableTop 1228 * @deprecated ICU 53 Call ucol_setMaxVariable() instead. 1229 */ 1230U_DEPRECATED uint32_t U_EXPORT2 1231ucol_setVariableTop(UCollator *coll, 1232 const UChar *varTop, int32_t len, 1233 UErrorCode *status); 1234 1235/** 1236 * Gets the variable top value of a Collator. 1237 * @param coll collator which variable top needs to be retrieved 1238 * @param status error code (not changed by function). If error code is set, 1239 * the return value is undefined. 1240 * @return the variable top primary weight 1241 * @see ucol_getMaxVariable 1242 * @see ucol_setVariableTop 1243 * @see ucol_restoreVariableTop 1244 * @stable ICU 2.0 1245 */ 1246U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status); 1247 1248/** 1249 * Sets the variable top to the specified primary weight. 1250 * 1251 * Beginning with ICU 53, the variable top is pinned to 1252 * the top of one of the supported reordering groups, 1253 * and it must not be beyond the last of those groups. 1254 * See ucol_setMaxVariable(). 1255 * @param varTop primary weight, as returned by ucol_setVariableTop or ucol_getVariableTop 1256 * @param status error code 1257 * @see ucol_getVariableTop 1258 * @see ucol_setVariableTop 1259 * @deprecated ICU 53 Call ucol_setMaxVariable() instead. 1260 */ 1261U_DEPRECATED void U_EXPORT2 1262ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status); 1263 1264/** 1265 * Thread safe cloning operation. The result is a clone of a given collator. 1266 * @param coll collator to be cloned 1267 * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> 1268 * user allocated space for the new clone. 1269 * If NULL new memory will be allocated. 1270 * If buffer is not large enough, new memory will be allocated. 1271 * Clients can use the U_COL_SAFECLONE_BUFFERSIZE. 1272 * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> 1273 * pointer to size of allocated space. 1274 * If *pBufferSize == 0, a sufficient size for use in cloning will 1275 * be returned ('pre-flighting') 1276 * If *pBufferSize is not enough for a stack-based safe clone, 1277 * new memory will be allocated. 1278 * @param status to indicate whether the operation went on smoothly or there were errors 1279 * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any 1280 * allocations were necessary. 1281 * @return pointer to the new clone 1282 * @see ucol_open 1283 * @see ucol_openRules 1284 * @see ucol_close 1285 * @stable ICU 2.0 1286 */ 1287U_STABLE UCollator* U_EXPORT2 1288ucol_safeClone(const UCollator *coll, 1289 void *stackBuffer, 1290 int32_t *pBufferSize, 1291 UErrorCode *status); 1292 1293#ifndef U_HIDE_DEPRECATED_API 1294 1295/** default memory size for the new clone. 1296 * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer. 1297 */ 1298#define U_COL_SAFECLONE_BUFFERSIZE 1 1299 1300#endif /* U_HIDE_DEPRECATED_API */ 1301 1302/** 1303 * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 1304 * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 1305 * to store rules, will store up to available space. 1306 * 1307 * ucol_getRules() should normally be used instead. 1308 * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales 1309 * @param coll collator to get the rules from 1310 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 1311 * @param buffer buffer to store the result in. If NULL, you'll get no rules. 1312 * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in. 1313 * @return current rules 1314 * @stable ICU 2.0 1315 * @see UCOL_FULL_RULES 1316 */ 1317U_STABLE int32_t U_EXPORT2 1318ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen); 1319 1320#ifndef U_HIDE_DEPRECATED_API 1321/** 1322 * gets the locale name of the collator. If the collator 1323 * is instantiated from the rules, then this function returns 1324 * NULL. 1325 * @param coll The UCollator for which the locale is needed 1326 * @param type You can choose between requested, valid and actual 1327 * locale. For description see the definition of 1328 * ULocDataLocaleType in uloc.h 1329 * @param status error code of the operation 1330 * @return real locale name from which the collation data comes. 1331 * If the collator was instantiated from rules, returns 1332 * NULL. 1333 * @deprecated ICU 2.8 Use ucol_getLocaleByType instead 1334 */ 1335U_DEPRECATED const char * U_EXPORT2 1336ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1337#endif /* U_HIDE_DEPRECATED_API */ 1338 1339/** 1340 * gets the locale name of the collator. If the collator 1341 * is instantiated from the rules, then this function returns 1342 * NULL. 1343 * @param coll The UCollator for which the locale is needed 1344 * @param type You can choose between requested, valid and actual 1345 * locale. For description see the definition of 1346 * ULocDataLocaleType in uloc.h 1347 * @param status error code of the operation 1348 * @return real locale name from which the collation data comes. 1349 * If the collator was instantiated from rules, returns 1350 * NULL. 1351 * @stable ICU 2.8 1352 */ 1353U_STABLE const char * U_EXPORT2 1354ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1355 1356/** 1357 * Get a Unicode set that contains all the characters and sequences tailored in 1358 * this collator. The result must be disposed of by using uset_close. 1359 * @param coll The UCollator for which we want to get tailored chars 1360 * @param status error code of the operation 1361 * @return a pointer to newly created USet. Must be be disposed by using uset_close 1362 * @see ucol_openRules 1363 * @see uset_close 1364 * @stable ICU 2.4 1365 */ 1366U_STABLE USet * U_EXPORT2 1367ucol_getTailoredSet(const UCollator *coll, UErrorCode *status); 1368 1369#ifndef U_HIDE_INTERNAL_API 1370/** Calculates the set of unsafe code points, given a collator. 1371 * A character is unsafe if you could append any character and cause the ordering to alter significantly. 1372 * Collation sorts in normalized order, so anything that rearranges in normalization can cause this. 1373 * Thus if you have a character like a_umlaut, and you add a lower_dot to it, 1374 * then it normalizes to a_lower_dot + umlaut, and sorts differently. 1375 * @param coll Collator 1376 * @param unsafe a fill-in set to receive the unsafe points 1377 * @param status for catching errors 1378 * @return number of elements in the set 1379 * @internal ICU 3.0 1380 */ 1381U_INTERNAL int32_t U_EXPORT2 1382ucol_getUnsafeSet( const UCollator *coll, 1383 USet *unsafe, 1384 UErrorCode *status); 1385 1386/** Touches all resources needed for instantiating a collator from a short string definition, 1387 * thus filling up the cache. 1388 * @param definition A short string containing a locale and a set of attributes. 1389 * Attributes not explicitly mentioned are left at the default 1390 * state for a locale. 1391 * @param parseError if not NULL, structure that will get filled with error's pre 1392 * and post context in case of error. 1393 * @param forceDefaults if FALSE, the settings that are the same as the collator 1394 * default settings will not be applied (for example, setting 1395 * French secondary on a French collator would not be executed). 1396 * If TRUE, all the settings will be applied regardless of the 1397 * collator default value. If the definition 1398 * strings are to be cached, should be set to FALSE. 1399 * @param status Error code. Apart from regular error conditions connected to 1400 * instantiating collators (like out of memory or similar), this 1401 * API will return an error if an invalid attribute or attribute/value 1402 * combination is specified. 1403 * @see ucol_openFromShortString 1404 * @internal ICU 3.2.1 1405 */ 1406U_INTERNAL void U_EXPORT2 1407ucol_prepareShortStringOpen( const char *definition, 1408 UBool forceDefaults, 1409 UParseError *parseError, 1410 UErrorCode *status); 1411#endif /* U_HIDE_INTERNAL_API */ 1412 1413/** Creates a binary image of a collator. This binary image can be stored and 1414 * later used to instantiate a collator using ucol_openBinary. 1415 * This API supports preflighting. 1416 * @param coll Collator 1417 * @param buffer a fill-in buffer to receive the binary image 1418 * @param capacity capacity of the destination buffer 1419 * @param status for catching errors 1420 * @return size of the image 1421 * @see ucol_openBinary 1422 * @stable ICU 3.2 1423 */ 1424U_STABLE int32_t U_EXPORT2 1425ucol_cloneBinary(const UCollator *coll, 1426 uint8_t *buffer, int32_t capacity, 1427 UErrorCode *status); 1428 1429/** Opens a collator from a collator binary image created using 1430 * ucol_cloneBinary. Binary image used in instantiation of the 1431 * collator remains owned by the user and should stay around for 1432 * the lifetime of the collator. The API also takes a base collator 1433 * which usually should be the root collator. 1434 * @param bin binary image owned by the user and required through the 1435 * lifetime of the collator 1436 * @param length size of the image. If negative, the API will try to 1437 * figure out the length of the image 1438 * @param base fallback collator, usually the root collator. Base is required to be 1439 * present through the lifetime of the collator. Currently 1440 * it cannot be NULL. 1441 * @param status for catching errors 1442 * @return newly created collator 1443 * @see ucol_cloneBinary 1444 * @stable ICU 3.2 1445 */ 1446U_STABLE UCollator* U_EXPORT2 1447ucol_openBinary(const uint8_t *bin, int32_t length, 1448 const UCollator *base, 1449 UErrorCode *status); 1450 1451 1452#endif /* #if !UCONFIG_NO_COLLATION */ 1453 1454#endif 1455