1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************* 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 1998-2010, International Business Machines 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************* 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Private implementation header for C collation 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* file name: ucol_imp.h 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* encoding: US-ASCII 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* tab size: 8 (not used) 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* indentation:4 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created on: 2000dec11 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created by: Vladimir Weinstein 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Modification history 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Date Name Comments 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 02/16/2001 synwee Added UCOL_GETPREVCE for the use in ucoleitr 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 02/27/2001 synwee Added getMaxExpansion data structure in UCollator 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 03/02/2001 synwee Added UCOL_IMPLICIT_CE 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 03/12/2001 synwee Added pointer start to collIterate. 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef UCOL_IMP_H 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_IMP_H 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCA_DATA_TYPE "icu" 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCA_DATA_NAME "ucadata" 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define INVC_DATA_TYPE "icu" 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define INVC_DATA_NAME "invuca" 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Convenience string denoting the Collation data tree 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal ICU 3.0 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define U_ICUDATA_COLL U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll" 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_COLLATION 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef XP_CPLUSPLUS 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/normalizer2.h" 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/unistr.h" 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucol.h" 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie.h" 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* This is the internal header file which contains important declarations for 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the collation framework. 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Ready to use collators are stored as binary images. Both UCA and tailorings 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * share the same binary format. Individual files (currently only UCA) have a 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * udata header in front of the image and should be opened using udata_open. 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Tailoring images are currently stored inside resource bundles and are intialized 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * through ucol_open API. 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The following describes the formats for collation binaries 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (UCA & tailorings) and for the inverse UCA table. 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Substructures are described in the collation design document at 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * ------------------------------------------------------------- 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Here is the format of binary collation image. 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Physical order of structures: 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - header (UCATableHeader) 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - options (UColOptionSet) 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - expansions (CE[]) 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - contractions (UChar[contractionSize] + CE[contractionSize]) 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - serialized UTrie with mappings of code points to CEs 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - max expansion tables (CE[endExpansionCECount] + uint8_t[endExpansionCECount]) 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - two bit sets for backward processing in strcoll (identical prefixes) 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and for backward CE iteration (each set is uint8_t[UCOL_UNSAFECP_TABLE_SIZE]) 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - UCA constants (UCAConstants) 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - UCA contractions (UChar[contractionUCACombosSize][contractionUCACombosWidth]) 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UCATableHeader fields: 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * int32_t size; - image size in bytes 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Offsets to interesting data. All offsets are in bytes. 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to get the address add to the header address and cast properly. 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Some offsets are zero if the corresponding structures are empty. 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Tailoring binaries that only set options and contain no mappings etc. 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * will have all offsets 0 except for the options and expansion offsets, 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * which give the position and length of the options array. 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t options; - offset to default collator options (UColOptionSet *), 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * a set of 32-bit values. See declaration of UColOptionSet for more details 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t UCAConsts; - only used (!=0) in UCA image - structure which holds values for indirect positioning and implicit ranges 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * See declaration of UCAConstants structure. This is a set of unsigned 32-bit values used to store 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * important constant values that are defined in the UCA and used for building and runtime. 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t contractionUCACombos; - only used (!=0) in UCA image - list of UCA contractions. This is a zero terminated array of UChar[contractionUCACombosWidth], 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * containing contractions from the UCA. These are needed in the build process to copy UCA contractions 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * in case the base contraction symbol is tailored. 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t magic; - must contain UCOL_HEADER_MAGIC (formatVersion 2.3) 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t mappingPosition; - offset to UTrie (const uint8_t *mappingPosition). This is a serialized UTrie and should be treated as such. 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Used as a primary lookup table for collation elements. 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t expansion; - offset to expansion table (uint32_t *expansion). This is an array of expansion CEs. Never 0. 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t contractionIndex; - offset to contraction table (UChar *contractionIndex). Used to look up contraction sequences. Contents 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * are aligned with the contents of contractionCEs table. 0 if no contractions. 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t contractionCEs; - offset to resulting contraction CEs (uint32_t *contractionCEs). When a contraction is resolved in the 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * in the contractionIndex table, the resulting index is used to look up corresponding CE in this table. 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 0 if no contractions. 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t contractionSize; - size of contraction table in elements (both Index and CEs). 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Tables described below are used for Boyer-Moore searching algorithm - they define the size of longest expansion 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and last CEs in expansions. 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t endExpansionCE; - offset to array of last collation element in expansion (uint32_t *). 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Never 0. 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t expansionCESize; - array of maximum expansion sizes (uint8_t *) 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * int32_t endExpansionCECount; - size of endExpansionCE. See UCOL_GETMAXEXPANSION 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for the usage model 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * These two offsets point to byte tables that are used in the backup heuristics. 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t unsafeCP; - hash table of unsafe code points (uint8_t *). See ucol_unsafeCP function. 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t contrEndCP; - hash table of final code points in contractions (uint8_t *). See ucol_contractionEndCP. 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * int32_t contractionUCACombosSize; - number of UChar[contractionUCACombosWidth] in contractionUCACombos 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (formatVersion 2.3) 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UBool jamoSpecial; - Jamo special indicator (uint8_t). If TRUE, Jamos are special, so we cannot use simple Hangul decomposition. 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UBool isBigEndian; - endianness of this collation binary (formatVersion 2.3) 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint8_t charSetFamily; - charset family of this collation binary (formatVersion 2.3) 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint8_t contractionUCACombosWidth; - number of UChars per UCA contraction in contractionUCACombos (formatVersion 2.3) 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Various version fields 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UVersionInfo version; - version 4 uint8_t 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UVersionInfo UCAVersion; - version 4 uint8_t 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UVersionInfo UCDVersion; - version 4 uint8_t 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UVersionInfo formatVersion; - version of the format of the collation binary 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * same formatVersion as in ucadata.icu's UDataInfo header 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (formatVersion 2.3) 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t offset to the reordering code to lead CE byte remapping table 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t offset to the lead CE byte to reordering code mapping table 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint8_t reserved[76]; - currently unused 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * ------------------------------------------------------------- 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Inverse UCA is used for constructing collators from rules. It is always an individual file 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and always has a UDataInfo header. 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * here is the structure: 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t byteSize; - size of inverse UCA image in bytes 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t tableSize; - length of inverse table (number of uint32_t[3] rows) 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t contsSize; - size of continuation table (number of UChars in table) 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t table; - offset to inverse table (uint32_t *) 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Inverse table contains of rows of 3 uint32_t values. First two values are CE and a possible continuation 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the third value is either a code unit (if there is only one code unit for element) or an index to continuation 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (number of code units combined with an index). 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * table. If more than one codepoint have the same CE, continuation table contains code units separated by FFFF and final 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * code unit sequence for a CE is terminated by FFFE. 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint32_t conts; - offset to continuation table (uint16_t *). Contains code units that transform to a same CE. 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UVersionInfo UCAVersion; - version of the UCA, read from file 4 uint8_t 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint8_t padding[8]; - padding 8 uint8_t 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Header is followed by the table and continuation table. 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* let us know whether reserved fields are reset to zero or junked */ 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_HEADER_MAGIC 0x20030618 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* UDataInfo for UCA mapping table */ 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* dataFormat="UCol" */ 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCA_DATA_FORMAT_0 ((uint8_t)0x55) 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCA_DATA_FORMAT_1 ((uint8_t)0x43) 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCA_DATA_FORMAT_2 ((uint8_t)0x6f) 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCA_DATA_FORMAT_3 ((uint8_t)0x6c) 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCA_FORMAT_VERSION_0 ((uint8_t)3) 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCA_FORMAT_VERSION_1 ((uint8_t)0) 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCA_FORMAT_VERSION_2 ((uint8_t)0) 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCA_FORMAT_VERSION_3 ((uint8_t)0) 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* UDataInfo for inverse UCA table */ 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* dataFormat="InvC" */ 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define INVUCA_DATA_FORMAT_0 ((uint8_t)0x49) 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define INVUCA_DATA_FORMAT_1 ((uint8_t)0x6E) 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define INVUCA_DATA_FORMAT_2 ((uint8_t)0x76) 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define INVUCA_DATA_FORMAT_3 ((uint8_t)0x43) 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define INVUCA_FORMAT_VERSION_0 ((uint8_t)2) 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define INVUCA_FORMAT_VERSION_1 ((uint8_t)1) 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define INVUCA_FORMAT_VERSION_2 ((uint8_t)0) 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define INVUCA_FORMAT_VERSION_3 ((uint8_t)0) 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* This is the size of the stack allocated buffer for sortkey generation and similar operations */ 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* if it is too small, heap allocation will occur.*/ 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* you can change this value if you need memory - it will affect the performance, though, since we're going to malloc */ 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_MAX_BUFFER 128 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_PRIMARY_MAX_BUFFER 8*UCOL_MAX_BUFFER 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_SECONDARY_MAX_BUFFER UCOL_MAX_BUFFER 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_TERTIARY_MAX_BUFFER UCOL_MAX_BUFFER 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_CASE_MAX_BUFFER UCOL_MAX_BUFFER/4 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_QUAD_MAX_BUFFER 2*UCOL_MAX_BUFFER 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NORMALIZATION_GROWTH 2 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NORMALIZATION_MAX_BUFFER UCOL_MAX_BUFFER*UCOL_NORMALIZATION_GROWTH 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* This writable buffer is used if we encounter Thai and need to reorder the string on the fly */ 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Sometimes we already have a writable buffer (like in case of normalized strings). */ 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)you can change this value to any value >= 4 if you need memory - 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)it will affect the performance, though, since we're going to malloc. 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Note 3 is the minimum value for Thai collation and 4 is the 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)minimum number for special Jamo 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_WRITABLE_BUFFER_SIZE 256 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* This is the size of the buffer for expansion CE's */ 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* In reality we should not have to deal with expm sequences longer then 16 */ 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* you can change this value if you need memory */ 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* WARNING THIS BUFFER DOES HAVE MALLOC FALLBACK. If you make it too small, you'll get into performance trouble */ 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Reasonable small value is around 10, if you don't do Arabic or other funky collations that have long expansion sequence */ 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* This is the longest expansion sequence we can handle without bombing out */ 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_EXPAND_CE_BUFFER_SIZE 64 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* This is the size to increase the buffer for expansion CE's */ 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_EXPAND_CE_BUFFER_EXTEND_SIZE 64 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Unsafe UChar hash table table size. */ 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* size is 32 bytes for 1 bit for each latin 1 char + some power of two for */ 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* hashing the rest of the chars. Size in bytes */ 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_UNSAFECP_TABLE_SIZE 1056 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* mask value down to "some power of two"-1 */ 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* number of bits, not num of bytes. */ 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_UNSAFECP_TABLE_MASK 0x1fff 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* flags bits for collIterate.flags */ 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* */ 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* NORM - set for incremental normalize of source string */ 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_ITER_NORM 1 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_ITER_HASLEN 2 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* UCOL_ITER_INNORMBUF - set if the "pos" is in */ 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the writable side buffer, handling */ 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* incrementally normalized characters. */ 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_ITER_INNORMBUF 4 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* UCOL_ITER_ALLOCATED - set if this iterator has */ 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* malloced storage to expand a buffer. */ 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_ITER_ALLOCATED 8 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* UCOL_HIRAGANA_Q - note if the codepoint was hiragana */ 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_HIRAGANA_Q 16 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* UCOL_WAS_HIRAGANA - set to TRUE if there was a Hiragana */ 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* otherwise set to false */ 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_WAS_HIRAGANA 32 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* UCOL_USE_ITERATOR - set this if collIterate uses a */ 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* character iterator instead of simply accessing string */ 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* by index */ 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_USE_ITERATOR 64 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FORCE_HAN_IMPLICIT 128 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define NFC_ZERO_CC_BLOCK_LIMIT_ 0x300 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef XP_CPLUSPLUS 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct collIterate : public UMemory { 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *string; /* Original string */ 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* UChar *start; Pointer to the start of the source string. Either points to string 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) or to writableBuffer */ 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *endp; /* string end ptr. Is undefined for null terminated strings */ 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *pos; /* This is position in the string. Can be to original or writable buf */ 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *toReturn; /* This is the CE from CEs buffer that should be returned */ 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *CEpos; /* This is the position to which we have stored processed CEs */ 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *offsetReturn; /* This is the offset to return, if non-NULL */ 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *offsetStore; /* This is the pointer for storing offsets */ 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t offsetRepeatCount; /* Repeat stored offset if non-zero */ 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t offsetRepeatValue; /* offset value to repeat */ 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString writableBuffer; 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *fcdPosition; /* Position in the original string to continue FCD check from. */ 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UCollator *coll; 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const Normalizer2 *nfd; 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t flags; 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t origFlags; 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *extendCEs; /* This is use if CEs is not big enough */ 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t extendCEsSize; /* Holds the size of the dynamic CEs buffer */ 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t CEs[UCOL_EXPAND_CE_BUFFER_SIZE]; /* This is where we store CEs */ 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *offsetBuffer; /* A dynamic buffer to hold offsets */ 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t offsetBufferSize; /* The size of the offset buffer */ 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCharIterator *iterator; 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*int32_t iteratorIndex;*/ 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The offsetBuffer should probably be a UVector32, but helper functions 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // are an improvement over duplicated code. 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void appendOffset(int32_t offset, UErrorCode &errorCode); 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} collIterate; 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct collIterate collIterate; 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define paddedsize(something) ((something)+((((something)%4)!=0)?(4-(something)%4):0)) 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define headersize (paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet))) 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct used internally in getSpecial*CE. 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)data similar to collIterate. 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct collIterateState { 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *pos; /* This is position in the string. Can be to original or writable buf */ 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *returnPos; 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *fcdPosition; /* Position in the original string to continue FCD check from. */ 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *bufferaddress; /* address of the normalization buffer */ 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t buffersize; 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t flags; 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t origFlags; 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t iteratorIndex; 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t iteratorMove; 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_init_collIterate(const UCollator *collator, 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *sourceString, int32_t sourceLen, 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_NAMESPACE_QUALIFIER collIterate *s, UErrorCode *status); 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Internal functions for C test code. */ 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI U_NAMESPACE_QUALIFIER collIterate * U_EXPORT2 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_new_collIterate(UErrorCode *status); 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_delete_collIterate(U_NAMESPACE_QUALIFIER collIterate *s); 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* @return s->pos == s->endp */ 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_collIterateAtEnd(U_NAMESPACE_QUALIFIER collIterate *s); 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef XP_CPLUSPLUS 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct UCollationPCE; 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct UCollationPCE UCollationPCE; 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct UCollationElements : public U_NAMESPACE_QUALIFIER UMemory 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Struct wrapper for source data 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_NAMESPACE_QUALIFIER collIterate iteratordata_; 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Indicates if this data has been reset. 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool reset_; 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Indicates if the data should be deleted. 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool isWritable; 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Data for getNextProcessed, getPreviousProcessed. 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_NAMESPACE_QUALIFIER UCollationPCE *pce; 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#else 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*opaque type*/ 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct UCollationElements; 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_init_pce(const struct UCollationElements *elems); 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LEVELTERMINATOR 1 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* mask off anything but primary order */ 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_PRIMARYORDERMASK 0xffff0000 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* mask off anything but secondary order */ 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_SECONDARYORDERMASK 0x0000ff00 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* mask off anything but tertiary order */ 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_TERTIARYORDERMASK 0x000000ff 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* primary order shift */ 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_PRIMARYORDERSHIFT 16 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* secondary order shift */ 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_SECONDARYORDERSHIFT 8 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_BYTE_SIZE_MASK 0xFF 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_CASE_BYTE_START 0x80 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_CASE_SHIFT_START 7 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_IGNORABLE 0 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* get weights from a CE */ 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_PRIMARYORDER(order) (((order) & UCOL_PRIMARYORDERMASK)>> UCOL_PRIMARYORDERSHIFT) 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_SECONDARYORDER(order) (((order) & UCOL_SECONDARYORDERMASK)>> UCOL_SECONDARYORDERSHIFT) 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_TERTIARYORDER(order) ((order) & UCOL_TERTIARYORDERMASK) 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Determine if a character is a Thai vowel (which sorts after 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * its base consonant). 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_ISTHAIPREVOWEL(ch) ((((uint32_t)(ch) - 0xe40) <= (0xe44 - 0xe40)) || \ 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (((uint32_t)(ch) - 0xec0) <= (0xec4 - 0xec0))) 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Determine if a character is a Thai base consonant 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_ISTHAIBASECONSONANT(ch) ((uint32_t)(ch) - 0xe01) <= (0xe2e - 0xe01) 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_ISJAMO(ch) ((((uint32_t)(ch) - 0x1100) <= (0x1112 - 0x1100)) || \ 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (((uint32_t)(ch) - 0x1161) <= (0x1175 - 0x1161)) || \ 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (((uint32_t)(ch) - 0x11A8) <= (0x11C2 - 0x11A8))) 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Han character ranges */ 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_HAN 0x4E00 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_HAN 0x9FFF 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_HAN_A 0x3400 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_HAN_A 0x4DBF 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_HAN_COMPAT 0xFAE0 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_HAN_COMPAT 0xFA2F 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Han extension B is in plane 2 */ 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_HAN_B 0x20000 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_HAN_B 0x2A6DF 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Hangul range */ 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_HANGUL 0xAC00 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_HANGUL 0xD7AF 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Jamo ranges */ 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_L_JAMO 0x1100 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_V_JAMO 0x1161 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_T_JAMO 0x11A8 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_T_JAMO 0x11F9 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* initializes collIterate structure */ 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* made as macro to speed up things */ 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define init_collIterate(collator, sourceString, sourceLen, s) { \ 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (s)->start = (s)->string = (s)->pos = (UChar *)(sourceString); \ 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (s)->endp = (sourceLen) == -1 ? NULL :(UChar *)(sourceString)+(sourceLen); \ 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (s)->CEpos = (s)->toReturn = (s)->CEs; \ 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (s)->isThai = TRUE; \ 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (s)->writableBuffer = (s)->stackWritableBuffer; \ 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (s)->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE; \ 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (s)->coll = (collator); \ 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (s)->fcdPosition = 0; \ 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (s)->flags = 0; \ 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(((collator)->normalizationMode == UCOL_ON)) (s)->flags |= UCOL_ITER_NORM; \ 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Macro to get the maximum size of an expansion ending with the argument ce. 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Used in the Boyer Moore algorithm. 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Note for tailoring, the UCA maxexpansion table has been merged. 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Hence we only have to search the tailored collator only. 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param coll const UCollator pointer 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param order last collation element of the expansion sequence 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param result size of the longest expansion with argument collation element 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* as the last element 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_GETMAXEXPANSION(coll, order, result) { \ 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *start; \ 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *limit; \ 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *mid; \ 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start = (coll)->endExpansionCE; \ 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit = (coll)->lastEndExpansionCE; \ 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (start < limit - 1) { \ 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mid = start + ((limit - start) >> 1); \ 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((order) <= *mid) { \ 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit = mid; \ 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } \ 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { \ 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start = mid; \ 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } \ 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } \ 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (*start == order) { \ 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE)); \ 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } \ 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else if (*limit == order) { \ 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = *(coll->expansionCESize + (limit - coll->endExpansionCE)); \ 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } \ 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else if ((order & 0xFFFF) == 0x00C0) { \ 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = 2; \ 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } \ 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { \ 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = 1; \ 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } \ 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_NAMESPACE_QUALIFIER collIterate *source, UErrorCode *status); 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE, 523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_NAMESPACE_QUALIFIER collIterate *source, UErrorCode *status); 524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t U_EXPORT2 ucol_getNextCE(const UCollator *coll, 525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_NAMESPACE_QUALIFIER collIterate *collationSource, UErrorCode *status); 526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC uint32_t U_EXPORT2 ucol_getPrevCE(const UCollator *coll, 527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_NAMESPACE_QUALIFIER collIterate *collationSource, 528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status); 529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* function used by C++ getCollationKey to prevent restarting the calculation */ 530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC int32_t 531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucol_getSortKeyWithAllocation(const UCollator *coll, 532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *source, int32_t sourceLength, 533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t **pResult, 534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode); 535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* get some memory */ 537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void *ucol_getABuffer(const UCollator *coll, uint32_t size); 538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* worker function for generating sortkeys */ 540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC 541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t U_CALLCONV 542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucol_calcSortKey(const UCollator *coll, 543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *source, 544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t sourceLength, 545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t **result, 546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t resultLength, 547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool allocatePrimary, 548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status); 549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC 551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t U_CALLCONV 552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucol_calcSortKeySimpleTertiary(const UCollator *coll, 553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *source, 554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t sourceLength, 555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t **result, 556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t resultLength, 557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool allocatePrimary, 558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status); 559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC 561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t 562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucol_getSortKeySize(const UCollator *coll, U_NAMESPACE_QUALIFIER collIterate *s, 563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t currentSize, UColAttributeValue strength, 564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t len); 565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Makes a copy of the Collator's rule data. The format is 567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * that of .col files. 568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param length returns the length of the data, in bytes. 570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param status the error status 571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return memory, owned by the caller, of size 'length' bytes. 572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal INTERNAL USE ONLY 573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC uint8_t* U_EXPORT2 575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucol_cloneRuleData(const UCollator *coll, int32_t *length, UErrorCode *status); 576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Used to set requested and valid locales on a collator returned by the collator 579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * service. 580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void U_EXPORT2 582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt, char *actualLocaleToAdopt); 583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_SPECIAL_FLAG 0xF0000000 585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_TAG_SHIFT 24 586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_TAG_MASK 0x0F000000 587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define INIT_EXP_TABLE_SIZE 1024 588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NOT_FOUND 0xF0000000 589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_EXPANSION 0xF1000000 590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_CONTRACTION 0xF2000000 591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_THAI 0xF3000000 592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_UNMARKED 0x03 593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NEW_TERTIARYORDERMASK 0x0000003f 594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Bit mask for primary collation strength. */ 596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_PRIMARYMASK 0xFFFF0000 597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Bit mask for secondary collation strength. */ 599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_SECONDARYMASK 0x0000FF00 600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Bit mask for tertiary collation strength. */ 602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_TERTIARYMASK 0x000000FF 603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Internal. 606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This indicates the last element in a UCollationElements has been consumed. 607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Compare with the UCOL_NULLORDER, UCOL_NULLORDER is returned if error occurs. 608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NO_MORE_CES 0x00010101 610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NO_MORE_CES_PRIMARY 0x00010000 611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NO_MORE_CES_SECONDARY 0x00000100 612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NO_MORE_CES_TERTIARY 0x00000001 613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define isSpecial(CE) ((((CE)&UCOL_SPECIAL_FLAG)>>28)==0xF) 615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_UPPER_CASE 0x80 617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_MIXED_CASE 0x40 618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LOWER_CASE 0x00 619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_CONTINUATION_MARKER 0xC0 621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_REMOVE_CONTINUATION 0xFFFFFF3F 622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define isContinuation(CE) (((CE) & UCOL_CONTINUATION_MARKER) == UCOL_CONTINUATION_MARKER) 624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define isFlagged(CE) (((CE) & 0x80) == 0x80) 625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define isLongPrimary(CE) (((CE) & 0xC0) == 0xC0) 626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define getCETag(CE) (((CE)&UCOL_TAG_MASK)>>UCOL_TAG_SHIFT) 628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define isContraction(CE) (isSpecial((CE)) && (getCETag((CE)) == CONTRACTION_TAG)) 629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define isPrefix(CE) (isSpecial((CE)) && (getCETag((CE)) == SPEC_PROC_TAG)) 630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define constructContractCE(tag, CE) (UCOL_SPECIAL_FLAG | ((tag)<<UCOL_TAG_SHIFT) | ((CE)&0xFFFFFF)) 631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define constructSpecProcCE(CE) (UCOL_SPECIAL_FLAG | (SPEC_PROC_TAG<<UCOL_TAG_SHIFT) | ((CE)&0xFFFFFF)) 632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define getContractOffset(CE) ((CE)&0xFFFFFF) 633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define getExpansionOffset(CE) (((CE)&0x00FFFFF0)>>4) 634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define getExpansionCount(CE) ((CE)&0xF) 635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define isCEIgnorable(CE) (((CE) & 0xFFFFFFBF) == 0) 636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* StringSearch internal use */ 638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define inNormBuf(coleiter) ((coleiter)->iteratordata_.flags & UCOL_ITER_INNORMBUF) 639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define isFCDPointerNull(coleiter) ((coleiter)->iteratordata_.fcdPosition == NULL) 640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define hasExpansion(coleiter) ((coleiter)->iteratordata_.CEpos != (coleiter)->iteratordata_.CEs) 641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define getExpansionPrefix(coleiter) ((coleiter)->iteratordata_.toReturn - (coleiter)->iteratordata_.CEs) 642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define setExpansionPrefix(coleiter, offset) ((coleiter)->iteratordata_.CEs + offset) 643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define getExpansionSuffix(coleiter) ((coleiter)->iteratordata_.CEpos - (coleiter)->iteratordata_.toReturn) 644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define setExpansionSuffix(coleiter, offset) ((coleiter)->iteratordata_.toReturn = (coleiter)->iteratordata_.CEpos - leftoverces) 645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* This is an enum that lists magic special byte values from the fractional UCA. 647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * See also http://site.icu-project.org/design/collation/bytes */ 648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* TODO: all the #defines that refer to special byte values from the UCA should be changed to point here */ 649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum { 651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_BYTE_ZERO = 0x00, 652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_BYTE_LEVEL_SEPARATOR = 0x01, 653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_BYTE_SORTKEY_GLUE = 0x02, 654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_BYTE_SHIFT_PREFIX = 0x03, 655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_BYTE_UNSHIFTED_MIN = UCOL_BYTE_SHIFT_PREFIX, 656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_BYTE_FIRST_TAILORED = 0x04, 657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_BYTE_COMMON = 0x05, 658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_BYTE_FIRST_UCA = UCOL_BYTE_COMMON, 659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* TODO: Make the following values dynamic since they change with almost every UCA version. */ 660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_CODAN_PLACEHOLDER = 0x12, 661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_BYTE_FIRST_NON_LATIN_PRIMARY = 0x5B, 662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_BYTE_UNSHIFTED_MAX = 0xFF 663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_RESET_TOP_VALUE 0x9F000303 667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_PRIMARY_IGNORABLE 0x00008705 668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_PRIMARY_IGNORABLE 0x0000DD05 669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_PRIMARY_IGNORABLE_CONT 0x000051C0 670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_SECONDARY_IGNORABLE 0x00000000 671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_SECONDARY_IGNORABLE 0x00000500 672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_TERTIARY_IGNORABLE 0x00000000 673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_TERTIARY_IGNORABLE 0x00000000 674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_VARIABLE 0x05070505 675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_VARIABLE 0x179B0505 676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FIRST_NON_VARIABLE 0x1A200505 677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LAST_NON_VARIABLE 0x7B41058F 678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NEXT_TOP_VALUE 0xE8960303 680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NEXT_FIRST_PRIMARY_IGNORABLE 0x00008905 681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NEXT_LAST_PRIMARY_IGNORABLE 0x03000303 682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NEXT_FIRST_SECONDARY_IGNORABLE 0x00008705 683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NEXT_LAST_SECONDARY_IGNORABLE 0x00000500 684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NEXT_FIRST_TERTIARY_IGNORABLE 0x00000000 685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NEXT_LAST_TERTIARY_IGNORABLE 0x00000000 686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NEXT_FIRST_VARIABLE 0x05090505 687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NEXT_LAST_VARIABLE 0x1A200505 688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define PRIMARY_IMPLICIT_MIN 0xE8000000 690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define PRIMARY_IMPLICIT_MAX 0xF0000000 691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* These constants can be changed - sortkey size is affected by them */ 694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_PROPORTION2 0.5 695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_PROPORTION3 0.667 696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* These values come from the UCA */ 698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON_BOT2 UCOL_BYTE_COMMON 699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON_TOP2 0x86u 700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_TOTAL2 (UCOL_COMMON_TOP2-UCOL_COMMON_BOT2-1) 701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FLAG_BIT_MASK_CASE_SW_OFF 0x80 703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_FLAG_BIT_MASK_CASE_SW_ON 0x40 704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON_TOP3_CASE_SW_OFF 0x85 705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON_TOP3_CASE_SW_LOWER 0x45 706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON_TOP3_CASE_SW_UPPER 0xC5 707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* These values come from the UCA */ 709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON_BOT3 0x05 710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON_BOTTOM3_CASE_SW_UPPER 0x86; 712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON_BOTTOM3_CASE_SW_LOWER UCOL_COMMON_BOT3; 713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_TOP_COUNT2 (UCOL_PROPORTION2*UCOL_TOTAL2) 715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_BOT_COUNT2 (UCOL_TOTAL2-UCOL_TOP_COUNT2) 716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON2 UCOL_COMMON_BOT2 719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON3_UPPERFIRST 0xC5 720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON3_NORMAL UCOL_COMMON_BOT3 721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_COMMON4 0xFF 723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* constants for case level/case first handling */ 725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* used to instantiate UCollators fields in ucol_updateInternalState */ 726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_CASE_SWITCH 0xC0 727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_NO_CASE_SWITCH 0x00 728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_REMOVE_CASE 0x3F 730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_KEEP_CASE 0xFF 731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_CASE_BIT_MASK 0xC0 733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_TERT_CASE_MASK 0xFF 735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_ENDOFLATINONERANGE 0xFF 737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_LATINONETABLELEN (UCOL_ENDOFLATINONERANGE+50) 738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_BAIL_OUT_CE 0xFF000000 739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef enum { 742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NOT_FOUND_TAG = 0, 743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) EXPANSION_TAG = 1, /* This code point results in an expansion */ 744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CONTRACTION_TAG = 2, /* Start of a contraction */ 745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) THAI_TAG = 3, /* Thai character - do the reordering */ 746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CHARSET_TAG = 4, /* Charset processing, not yet implemented */ 747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SURROGATE_TAG = 5, /* Lead surrogate that is tailored and doesn't start a contraction */ 748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) HANGUL_SYLLABLE_TAG = 6, /* AC00-D7AF*/ 749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) LEAD_SURROGATE_TAG = 7, /* D800-DBFF*/ 750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TRAIL_SURROGATE_TAG = 8, /* DC00-DFFF*/ 751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CJK_IMPLICIT_TAG = 9, /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/ 752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) IMPLICIT_TAG = 10, 753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SPEC_PROC_TAG = 11, 754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* ICU 2.1 */ 755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) LONG_PRIMARY_TAG = 12, /* This is a three byte primary with starting secondaries and tertiaries */ 756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* It fits in a single 32 bit CE and is used instead of expansion to save */ 757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* space without affecting the performance (hopefully) */ 758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) DIGIT_TAG = 13, /* COllate Digits As Numbers (CODAN) implementation */ 760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CE_TAGS_COUNT 762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} UColCETags; 763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ***************************************************************************************** 766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * set to zero 767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * NON_CHARACTER FDD0 - FDEF, FFFE, FFFF, 1FFFE, 1FFFF, 2FFFE, 2FFFF,...e.g. **FFFE, **FFFF 768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ****************************************************************************************** 769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct { 772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t variableTopValue; 773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*UColAttributeValue*/ int32_t frenchCollation; 774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*UColAttributeValue*/ int32_t alternateHandling; /* attribute for handling variable elements*/ 775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*UColAttributeValue*/ int32_t caseFirst; /* who goes first, lower case or uppercase */ 776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*UColAttributeValue*/ int32_t caseLevel; /* do we have an extra case level */ 777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*UColAttributeValue*/ int32_t normalizationMode; /* attribute for normalization */ 778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*UColAttributeValue*/ int32_t strength; /* attribute for strength */ 779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*UColAttributeValue*/ int32_t hiraganaQ; /* attribute for special Hiragana */ 780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*UColAttributeValue*/ int32_t numericCollation; /* attribute for numeric collation */ 781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t reserved[15]; /* for future use */ 782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} UColOptionSet; 783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct { 785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_FIRST_TERTIARY_IGNORABLE[2]; /*0x00000000*/ 786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_LAST_TERTIARY_IGNORABLE[2]; /*0x00000000*/ 787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_FIRST_PRIMARY_IGNORABLE[2]; /*0x00008705*/ 788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_FIRST_SECONDARY_IGNORABLE[2]; /*0x00000000*/ 789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_LAST_SECONDARY_IGNORABLE[2]; /*0x00000500*/ 790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_LAST_PRIMARY_IGNORABLE[2]; /*0x0000DD05*/ 791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_FIRST_VARIABLE[2]; /*0x05070505*/ 792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_LAST_VARIABLE[2]; /*0x13CF0505*/ 793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_FIRST_NON_VARIABLE[2]; /*0x16200505*/ 794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_LAST_NON_VARIABLE[2]; /*0x767C0505*/ 795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_RESET_TOP_VALUE[2]; /*0x9F000303*/ 796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_FIRST_IMPLICIT[2]; 797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_LAST_IMPLICIT[2]; 798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_FIRST_TRAILING[2]; 799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_LAST_TRAILING[2]; 800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_NEXT_TOP_VALUE[2]; /*0xE8960303*/ 803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_NEXT_FIRST_PRIMARY_IGNORABLE; /*0x00008905*/ 804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_NEXT_LAST_PRIMARY_IGNORABLE; /*0x03000303*/ 805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_NEXT_FIRST_SECONDARY_IGNORABLE; /*0x00008705*/ 806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_NEXT_LAST_SECONDARY_IGNORABLE; /*0x00000500*/ 807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_NEXT_FIRST_TERTIARY_IGNORABLE; /*0x00000000*/ 808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_NEXT_LAST_TERTIARY_IGNORABLE; /*0x00000000*/ 809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_NEXT_FIRST_VARIABLE; /*0x05090505*/ 810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_NEXT_LAST_VARIABLE; /*0x16200505*/ 811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_PRIMARY_TOP_MIN; 814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_PRIMARY_IMPLICIT_MIN; /*0xE8000000*/ 815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_PRIMARY_IMPLICIT_MAX; /*0xF0000000*/ 816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_PRIMARY_TRAILING_MIN; /*0xE8000000*/ 817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_PRIMARY_TRAILING_MAX; /*0xF0000000*/ 818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_PRIMARY_SPECIAL_MIN; /*0xE8000000*/ 819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCA_PRIMARY_SPECIAL_MAX; /*0xF0000000*/ 820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} UCAConstants; 821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct { 823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t size; 824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* all the offsets are in bytes */ 825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* to get the address add to the header address and cast properly */ 826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t options; /* these are the default options for the collator */ 827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t UCAConsts; /* structure which holds values for indirect positioning and implicit ranges */ 828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t contractionUCACombos; /* this one is needed only for UCA, to copy the appropriate contractions */ 829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t magic; /* magic number - lets us know whether reserved data is reset or junked */ 830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t mappingPosition; /* const uint8_t *mappingPosition; */ 831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t expansion; /* uint32_t *expansion; */ 832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t contractionIndex; /* UChar *contractionIndex; */ 833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t contractionCEs; /* uint32_t *contractionCEs; */ 834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t contractionSize; /* needed for various closures */ 835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*int32_t latinOneMapping;*/ /* this is now handled in the trie itself *//* fast track to latin1 chars */ 836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t endExpansionCE; /* array of last collation element in 838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansion */ 839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t expansionCESize; /* array of maximum expansion size 840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) corresponding to the expansion 841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) collation elements with last element 842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) in endExpansionCE*/ 843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t endExpansionCECount; /* size of endExpansionCE */ 844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t unsafeCP; /* hash table of unsafe code points */ 845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t contrEndCP; /* hash table of final code points */ 846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* in contractions. */ 847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t contractionUCACombosSize; /* number of UCA contraction items. */ 849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*Length is contractionUCACombosSize*contractionUCACombosWidth*sizeof(UChar) */ 850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool jamoSpecial; /* is jamoSpecial */ 851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool isBigEndian; /* is this data big endian? from the UDataInfo header*/ 852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t charSetFamily; /* what is the charset family of this data from the UDataInfo header*/ 853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t contractionUCACombosWidth; /* width of UCA combos field */ 854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVersionInfo version; 855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVersionInfo UCAVersion; /* version of the UCA, read from file */ 856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVersionInfo UCDVersion; /* UCD version, obtained by u_getUnicodeVersion */ 857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVersionInfo formatVersion; /* format version from the UDataInfo header */ 858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t scriptToLeadByte; /* offset to script to lead collation byte mapping data */ 859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t leadByteToScript; /* offset to lead collation byte to script mapping data */ 860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t reserved[76]; /* for future use */ 861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} UCATableHeader; 862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define U_UNKNOWN_STATE 0 864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define U_COLLATOR_STATE 0x01 865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define U_STATE_LIMIT 0x02 866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* This is the first structure in a state */ 868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* it should be machine independent */ 869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct { 870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this structure is supposed to be readable on all the platforms.*/ 871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* first 2 fields hold the size of the structure in a platform independent way */ 872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t sizeLo; 873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t sizeHi; 874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* identifying the writing platform */ 875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t isBigEndian; 876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* see U_CHARSET_FAMILY values in utypes.h */ 877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t charsetFamily; 878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* version of ICU this state structure comes from */ 879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t icuVersion[4]; 880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* What is the data following this state */ 881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t type; 882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* more stuff to come, keep it on 16 byte boundary */ 883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t reserved[7]; 884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} UStateStruct; 885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* This structure follows UStatusStruct */ 887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* and contains data specific for the collators */ 888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Endianess needs to be decided before accessing this structure */ 889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* However, it's size IS endianess independent */ 890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct { 891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* size of this structure */ 892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t sizeLo; 893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t sizeHi; 894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* This state is followed by the frozen tailoring */ 895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t containsTailoring; 896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* This state is followed by the frozen UCA */ 897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t containsUCA; 898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Version info - the same one */ 899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t versionInfo[4]; 900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* for charset CEs */ 902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t charsetName[32]; 903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is the resolved locale name*/ 904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t locale[32]; 905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Attributes. Open ended */ 907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* all the following will be moved to uint32_t because of portability */ 908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* variable top value */ 909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t variableTopValue; 910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* attribute for handling variable elements*/ 911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t /*UColAttributeValue*/ alternateHandling; 912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* how to handle secondary weights */ 913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t /*UColAttributeValue*/ frenchCollation; 914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* who goes first, lower case or uppercase */ 915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t /*UColAttributeValue*/ caseFirst; 916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* do we have an extra case level */ 917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t /*UColAttributeValue*/ caseLevel; 918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* attribute for normalization */ 919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t /*UColAttributeValue*/ normalizationMode; 920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* attribute for strength */ 921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t /*UColAttributeValue*/ strength; 922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* to be immediately 16 byte aligned */ 923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t reserved[12]; 924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} UColStateStruct; 925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_INV_SIZEMASK 0xFFF00000 927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_INV_OFFSETMASK 0x000FFFFF 928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCOL_INV_SHIFTVALUE 20 929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN 931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct { 933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t byteSize; 934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t tableSize; 935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t contsSize; 936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t table; 937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t conts; 938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVersionInfo UCAVersion; /* version of the UCA, read from file */ 939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t padding[8]; 940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} InverseUCATableHeader; 941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef int32_t U_CALLCONV 943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SortKeyGenerator(const UCollator *coll, 944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *source, 945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t sourceLength, 946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t **result, 947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t resultLength, 948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool allocatePrimary, 949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status); 950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef void U_CALLCONV 952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ResourceCleaner(UCollator *coll); 953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct UCollator { 956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColOptionSet *options; 957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SortKeyGenerator *sortKeyGen; 958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *latinOneCEs; 959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char* actualLocale; 960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char* validLocale; 961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char* requestedLocale; 962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *rules; 963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *ucaRules; 964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UCollator *UCA; 965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UCATableHeader *image; 966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTrie mapping; 967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *latinOneMapping; 968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *expansion; 969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *contractionIndex; 970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *contractionCEs; 971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *endExpansionCE; /* array of last ces in an expansion ce. 973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) corresponds to expansionCESize */ 974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *lastEndExpansionCE;/* pointer to the last element in endExpansionCE */ 975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint8_t *expansionCESize; /* array of the maximum size of a 976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansion ce with the last ce 977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) corresponding to endExpansionCE, 978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) terminated with a null */ 979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint8_t *unsafeCP; /* unsafe code points hashtable */ 980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint8_t *contrEndCP; /* Contraction ending chars hash table */ 981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar minUnsafeCP; /* Smallest unsafe Code Point. */ 982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar minContrEndCP; /* Smallest code point at end of a contraction */ 983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t rulesLength; 985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t latinOneTableLen; 986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t variableTopValue; 988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColAttributeValue frenchCollation; 989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColAttributeValue alternateHandling; /* attribute for handling variable elements*/ 990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColAttributeValue caseFirst; /* who goes first, lower case or uppercase */ 991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColAttributeValue caseLevel; /* do we have an extra case level */ 992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColAttributeValue normalizationMode; /* attribute for normalization */ 993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColAttributeValue strength; /* attribute for strength */ 994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColAttributeValue hiraganaQ; /* attribute for Hiragana */ 995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColAttributeValue numericCollation; 996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool variableTopValueisDefault; 997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool frenchCollationisDefault; 998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool alternateHandlingisDefault; /* attribute for handling variable elements*/ 999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool caseFirstisDefault; /* who goes first, lower case or uppercase */ 1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool caseLevelisDefault; /* do we have an extra case level */ 1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool normalizationModeisDefault; /* attribute for normalization */ 1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool strengthisDefault; /* attribute for strength */ 1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool hiraganaQisDefault; /* attribute for Hiragana */ 1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool numericCollationisDefault; 1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool hasRealData; /* some collators have only options, like French, no rules */ 1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* to speed up things, we use the UCA image, but we don't want it */ 1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* to run around */ 1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool freeOnClose; 1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool freeOptionsOnClose; 1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool freeRulesOnClose; 1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool freeImageOnClose; 1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool latinOneUse; 1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool latinOneRegenTable; 1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool latinOneFailed; 1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int8_t tertiaryAddition; /* when switching case, we need to add or subtract different values */ 1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t caseSwitch; 1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t tertiaryCommon; 1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t tertiaryMask; 1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t tertiaryTop; /* Upper range when compressing */ 1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t tertiaryBottom; /* Upper range when compressing */ 1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t tertiaryTopCount; 1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t tertiaryBottomCount; 1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UVersionInfo dataVersion; /* Data info of UCA table */ 1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t* reorderCodes; 1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t reorderCodesLength; 1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t* leadBytePermutationTable; 1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END 1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* various internal functions */ 1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* do not close UCA returned by ucol_initUCA! */ 1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC 1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UCollator* ucol_initUCA(UErrorCode *status); 1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC 1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, const UCollator *UCA, UErrorCode *status); 1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC 1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCode *status); 1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC 1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UCollator* ucol_open_internal(const char* loc, UErrorCode* status); 1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC 1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ucol_putOptionsToHeader(UCollator* result, UColOptionSet * opts, UErrorCode *status); 1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC 1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ucol_updateInternalState(UCollator *coll, UErrorCode *status); 1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC uint32_t U_EXPORT2 ucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status); 1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2 ucol_isTailored(const UCollator *coll, const UChar u, UErrorCode *status); 1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI const InverseUCATableHeader* U_EXPORT2 ucol_initInverseUCA(UErrorCode *status); 1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_initImplicitConstants(UErrorCode *status); 1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t U_EXPORT2 1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_getImplicitFromRaw(UChar32 cp); 1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*U_CFUNC uint32_t U_EXPORT2 1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_getImplicitPrimary(UChar32 cp);*/ 1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UChar32 U_EXPORT2 1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_getRawFromImplicit(uint32_t implicit); 1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UChar32 U_EXPORT2 1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_getRawFromCodePoint(UChar32 i); 1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UChar32 U_EXPORT2 1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_getCodePointFromRaw(UChar32 i); 1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef const UChar* GetCollationRulesFunction(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status); 1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UCollator* U_EXPORT2 1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucol_openRulesForImport( const UChar *rules, 1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t rulesLength, 1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColAttributeValue normalizationMode, 1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCollationStrength strength, 1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError *parseError, 1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) GetCollationRulesFunction importFunc, 1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void* context, 1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status); 1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 ucol_buildPermutationTable(UCollator *coll, UErrorCode *status); 1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef XP_CPLUSPLUS 1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test whether a character is potentially "unsafe" for use as a collation 1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * starting point. Unsafe chars are those with combining class != 0 plus 1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * those that are the 2nd thru nth character in a contraction sequence. 1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Function is in header file because it's used in both collation and string search, 1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and needs to be inline for performance. 1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static inline UBool ucol_unsafeCP(UChar c, const UCollator *coll) { 1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t hash; 1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t htbyte; 1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c < coll->minUnsafeCP) { 1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) hash = c; 1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) { 1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(UTF_IS_SURROGATE(c)) { 1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Lead or trail surrogate */ 1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* These are always considered unsafe. */ 1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256; 1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) htbyte = coll->unsafeCP[hash>>3]; 1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return ((htbyte >> (hash & 7)) & 1); 1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* XP_CPLUSPLUS */ 1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* The offsetBuffer in collIterate might need to be freed to avoid memory leaks. */ 1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ucol_freeOffsetBuffer(U_NAMESPACE_QUALIFIER collIterate *s); 1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_COLLATION */ 1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1134