collationsettings.h revision 64339d36f8bd4db5025fe2988eda22b491a9219c
164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 51b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2013-2015, International Business Machines 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationsettings.h 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2013feb07 11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __COLLATIONSETTINGS_H__ 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __COLLATIONSETTINGS_H__ 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ucol.h" 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "sharedobject.h" 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "umutex.h" 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertstruct CollationData; 291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/** 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Collation settings/options/attributes. 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * These are the values that can be changed via API. 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstruct U_I18N_API CollationSettings : public SharedObject { 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Options bit 0: Perform the FCD check on the input text and deliver normalized text. 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t CHECK_FCD = 1; 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Options bit 1: Numeric collation. 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Also known as CODAN = COllate Digits As Numbers. 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Treat digit sequences as numbers with CE sequences in numeric order, 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * rather than returning a normal CE for each digit. 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t NUMERIC = 2; 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * "Shifted" alternate handling, see ALTERNATE_MASK. 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t SHIFTED = 4; 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable. 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Reserve values 8 and 0xc for shift-trimmed and blanked. 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t ALTERNATE_MASK = 0xc; 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by this value. 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t MAX_VARIABLE_SHIFT = 4; 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** maxVariable options bit mask before shifting. */ 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t MAX_VARIABLE_MASK = 0x70; 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Options bit 7: Reserved/unused/0. */ 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on. 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t UPPER_FIRST = 0x100; 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Options bit 9: Keep the case bits in the tertiary weight (they trump other tertiary values) 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * unless case level is on (when they are *moved* into the separate case level). 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * By default, the case bits are removed from the tertiary weight (ignored). 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs. UCOL_UPPER_FIRST. 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t CASE_FIRST = 0x200; 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Options bit mask for caseFirst and upperFirst, before shifting. 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Same value as caseFirst==upperFirst. 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST; 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Options bit 10: Insert the case level between the secondary and tertiary levels. 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t CASE_LEVEL = 0x400; 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Options bit 11: Compare secondary weights backwards. ("French secondary") 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t BACKWARD_SECONDARY = 0x800; 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Options bits 15..12: The 4-bit strength value bit field is shifted by this value. 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * It is the top used bit field in the options. (No need to mask after shifting.) 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t STRENGTH_SHIFT = 12; 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Strength options bit mask before shifting. */ 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t STRENGTH_MASK = 0xf000; 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** maxVariable values */ 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius enum MaxVariable { 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius MAX_VAR_SPACE, 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius MAX_VAR_PUNCT, 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius MAX_VAR_SYMBOL, 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius MAX_VAR_CURRENCY 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius }; 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationSettings() 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : options((UCOL_DEFAULT_STRENGTH << STRENGTH_SHIFT) | 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT)), 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius variableTop(0), 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius reorderTable(NULL), 1101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert minHighNoReorder(0), 1111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert reorderRanges(NULL), reorderRangesLength(0), 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fastLatinOptions(-1) {} 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationSettings(const CollationSettings &other); 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual ~CollationSettings(); 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool operator==(const CollationSettings &other) const; 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline UBool operator!=(const CollationSettings &other) const { 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return !operator==(other); 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t hashCode() const; 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void resetReordering(); 1271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert void aliasReordering(const CollationData &data, const int32_t *codes, int32_t length, 1281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const uint32_t *ranges, int32_t rangesLength, 1291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const uint8_t *table, UErrorCode &errorCode); 1301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert void setReordering(const CollationData &data, const int32_t *codes, int32_t codesLength, 1311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UErrorCode &errorCode); 1321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert void copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode); 1331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 1341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert inline UBool hasReordering() const { return reorderTable != NULL; } 1351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert static UBool reorderTableHasSplitBytes(const uint8_t table[256]); 1361b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert inline uint32_t reorder(uint32_t p) const { 1371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uint8_t b = reorderTable[p >> 24]; 1381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if(b != 0 || p <= Collation::NO_CE_PRIMARY) { 1391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return ((uint32_t)b << 24) | (p & 0xffffff); 1401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 1411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return reorderEx(p); 1421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 1431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode); 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static int32_t getStrength(int32_t options) { 148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return options >> STRENGTH_SHIFT; 149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t getStrength() const { 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return getStrength(options); 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Sets the options bit for an on/off attribute. */ 156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setFlag(int32_t bit, UColAttributeValue value, 157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t defaultOptions, UErrorCode &errorCode); 158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue getFlag(int32_t bit) const { 160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ((options & bit) != 0) ? UCOL_ON : UCOL_OFF; 161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setCaseFirst(UColAttributeValue value, int32_t defaultOptions, UErrorCode &errorCode); 164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue getCaseFirst() const { 166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t option = options & CASE_FIRST_AND_UPPER_MASK; 167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (option == 0) ? UCOL_OFF : 168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (option == CASE_FIRST) ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST; 169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setAlternateHandling(UColAttributeValue value, 172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t defaultOptions, UErrorCode &errorCode); 173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue getAlternateHandling() const { 175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ((options & ALTERNATE_MASK) == 0) ? UCOL_NON_IGNORABLE : UCOL_SHIFTED; 176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode); 179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius MaxVariable getMaxVariable() const { 181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (MaxVariable)((options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT); 182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Include case bits in the tertiary level if caseLevel=off and caseFirst!=off. 186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static inline UBool isTertiaryWithCaseBits(int32_t options) { 188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST; 189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static uint32_t getTertiaryMask(int32_t options) { 191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off. 192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return isTertiaryWithCaseBits(options) ? 193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collation::CASE_AND_TERTIARY_MASK : Collation::ONLY_TERTIARY_MASK; 194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static UBool sortsTertiaryUpperCaseFirst(int32_t options) { 197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // On tertiary level, consider case bits and sort uppercase first 198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // if caseLevel is off and caseFirst==upperFirst. 199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRST_AND_UPPER_MASK; 200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline UBool dontCheckFCD() const { 203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (options & CHECK_FCD) == 0; 204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline UBool hasBackwardSecondary() const { 207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (options & BACKWARD_SECONDARY) != 0; 208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline UBool isNumeric() const { 211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (options & NUMERIC) != 0; 212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** CHECK_FCD etc. */ 215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t options; 216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Variable-top primary weight. */ 217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t variableTop; 2181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert /** 2191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * 256-byte table for reordering permutation of primary lead bytes; NULL if no reordering. 2201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * A 0 entry at a non-zero index means that the primary lead byte is "split" 2211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * (there are different offsets for primaries that share that lead byte) 2221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * and the reordering offset must be determined via the reorderRanges. 2231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert */ 224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint8_t *reorderTable; 2251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert /** Limit of last reordered range. 0 if no reordering or no split bytes. */ 2261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uint32_t minHighNoReorder; 2271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert /** 2281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * Primary-weight ranges for script reordering, 2291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * to be used by reorder(p) for split-reordered primary lead bytes. 2301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * 2311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * Each entry is a (limit, offset) pair. 2321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * The upper 16 bits of the entry are the upper 16 bits of the 2331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * exclusive primary limit of a range. 2341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * Primaries between the previous limit and this one have their lead bytes 2351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * modified by the signed offset (-0xff..+0xff) stored in the lower 16 bits. 2361b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * 2371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * CollationData::makeReorderRanges() writes a full list where the first range 2381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * (at least for terminators and separators) has a 0 offset. 2391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * The last range has a non-zero offset. 2401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * minHighNoReorder is set to the limit of that last range. 2411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * 2421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * In the settings object, the initial ranges before the first split lead byte 2431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * are omitted for efficiency; they are handled by reorder(p) via the reorderTable. 2441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * If there are no split-reordered lead bytes, then no ranges are needed. 2451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert */ 2461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const uint32_t *reorderRanges; 2471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t reorderRangesLength; 248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Array of reorder codes; ignored if reorderCodesLength == 0. */ 249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const int32_t *reorderCodes; 250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Number of reorder codes; 0 if no reordering. */ 251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t reorderCodesLength; 252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Capacity of reorderCodes. 2541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * If 0, then the codes, the ranges, and the table are aliases. 255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Otherwise, this object owns the memory via the reorderCodes pointer; 2561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * the codes, the ranges, and the table are in the same memory block, in that order. 257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t reorderCodesCapacity; 259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Options for CollationFastLatin. Negative if disabled. */ 261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t fastLatinOptions; 262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint16_t fastLatinPrimaries[0x180]; 2631b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 2641b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertprivate: 2651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert void setReorderArrays(const int32_t *codes, int32_t codesLength, 2661b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const uint32_t *ranges, int32_t rangesLength, 2671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const uint8_t *table, UErrorCode &errorCode); 2681b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uint32_t reorderEx(uint32_t p) const; 269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END 272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // __COLLATIONSETTINGS_H__ 275