1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2012-2014, International Business Machines 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationdatabuilder.h 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2012apr01 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __COLLATIONDATABUILDER_H__ 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __COLLATIONDATABUILDER_H__ 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uniset.h" 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/unistr.h" 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uversion.h" 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h" 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsettings.h" 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h" 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utrie2.h" 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr32.h" 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr64.h" 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvector.h" 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstruct ConditionalCE32; 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CollationFastLatinBuilder; 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CopyHelper; 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass DataBuilderCollationIterator; 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass UCharsTrieBuilder; 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/** 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Low-level CollationData builder. 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Takes (character, CE) pairs and builds them into runtime data structures. 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Supports characters with context prefixes and contraction suffixes. 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API CollationDataBuilder : public UObject { 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Collation element modifier. Interface class for a modifier 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * that changes a tailoring builder's temporary CEs to final CEs. 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Called for every non-special CE32 and every expansion CE. 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius class CEModifier : public UObject { 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius public: 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual ~CEModifier(); 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Returns a new CE to replace the non-special input CE32, or else Collation::NO_CE. */ 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual int64_t modifyCE32(uint32_t ce32) const = 0; 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Returns a new CE to replace the input CE, or else Collation::NO_CE. */ 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual int64_t modifyCE(int64_t ce) const = 0; 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius }; 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationDataBuilder(UErrorCode &errorCode); 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual ~CollationDataBuilder(); 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void initForTailoring(const CollationData *b, UErrorCode &errorCode); 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UBool isCompressibleLeadByte(uint32_t b) const; 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline UBool isCompressiblePrimary(uint32_t p) const { 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return isCompressibleLeadByte(p >> 24); 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return TRUE if this builder has mappings (e.g., add() has been called) 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool hasMappings() const { return modified; } 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return TRUE if c has CEs in this builder 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool isAssigned(UChar32 c) const; 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return the three-byte primary if c maps to a single such CE and has no context data, 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * otherwise returns 0. 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t getLongPrimaryIfSingleCE(UChar32 c) const; 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return the single CE for c. 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Sets an error code if c does not have a single CE. 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t getSingleCE(UChar32 c, UErrorCode &errorCode) const; 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void add(const UnicodeString &prefix, const UnicodeString &s, 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const int64_t ces[], int32_t cesLength, 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Encodes the ces as either the returned ce32 by itself, 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * or by storing an expansion, with the returned ce32 referring to that. 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * add(p, s, ces, cesLength) = addCE32(p, s, encodeCEs(ces, cesLength)) 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void addCE32(const UnicodeString &prefix, const UnicodeString &s, 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t ce32, UErrorCode &errorCode); 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Sets three-byte-primary CEs for a range of code points in code point order, 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * if it is worth doing; otherwise no change is made. 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * None of the code points in the range should have complex mappings so far 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * (expansions/contractions/prefixes). 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param start first code point 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param end last code point (inclusive) 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param primary primary weight for 'start' 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param step per-code point primary-weight increment 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param errorCode ICU in/out error code 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return TRUE if an OFFSET_TAG range was used for start..end 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool maybeSetPrimaryRange(UChar32 start, UChar32 end, 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t primary, int32_t step, 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Sets three-byte-primary CEs for a range of code points in code point order. 127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Sets range values if that is worth doing, or else individual values. 128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * None of the code points in the range should have complex mappings so far 129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * (expansions/contractions/prefixes). 130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param start first code point 131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param end last code point (inclusive) 132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param primary primary weight for 'start' 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param step per-code point primary-weight increment 134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param errorCode ICU in/out error code 135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return the next primary after 'end': start primary incremented by ((end-start)+1)*step 136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t setPrimaryRangeAndReturnNext(UChar32 start, UChar32 end, 138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t primary, int32_t step, 139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Copies all mappings from the src builder, with modifications. 143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * This builder here must not be built yet, and should be empty. 144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void copyFrom(const CollationDataBuilder &src, const CEModifier &modifier, 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void optimize(const UnicodeSet &set, UErrorCode &errorCode); 149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void suppressContractions(const UnicodeSet &set, UErrorCode &errorCode); 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void enableFastLatin() { fastLatinEnabled = TRUE; } 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void build(CollationData &data, UErrorCode &errorCode); 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Looks up CEs for s and appends them to the ces array. 156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Does not handle normalization: s should be in FCD form. 157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Does not write completely ignorable CEs. 159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Does not write beyond Collation::MAX_EXPANSION_LENGTH. 160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return incremented cesLength 162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t getCEs(const UnicodeString &s, int64_t ces[], int32_t cesLength); 164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t getCEs(const UnicodeString &prefix, const UnicodeString &s, 165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ces[], int32_t cesLength); 166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected: 168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius friend class CopyHelper; 169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius friend class DataBuilderCollationIterator; 170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t getCE32FromOffsetCE32(UBool fromBase, UChar32 c, uint32_t ce32) const; 172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t addCE(int64_t ce, UErrorCode &errorCode); 174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t addCE32(uint32_t ce32, UErrorCode &errorCode); 175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t addConditionalCE32(const UnicodeString &context, uint32_t ce32, UErrorCode &errorCode); 176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline ConditionalCE32 *getConditionalCE32(int32_t index) const { 178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return static_cast<ConditionalCE32 *>(conditionalCE32s[index]); 179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline ConditionalCE32 *getConditionalCE32ForCE32(uint32_t ce32) const { 181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return getConditionalCE32(Collation::indexFromCE32(ce32)); 182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static uint32_t makeBuilderContextCE32(int32_t index) { 185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return Collation::makeCE32FromTagAndIndex(Collation::BUILDER_DATA_TAG, index); 186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static inline UBool isBuilderContextCE32(uint32_t ce32) { 188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return Collation::hasCE32Tag(ce32, Collation::BUILDER_DATA_TAG); 189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static uint32_t encodeOneCEAsCE32(int64_t ce); 192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t encodeOneCE(int64_t ce, UErrorCode &errorCode); 193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t encodeExpansion(const int64_t ces[], int32_t length, UErrorCode &errorCode); 194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t encodeExpansion32(const int32_t newCE32s[], int32_t length, UErrorCode &errorCode); 195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t copyFromBaseCE32(UChar32 c, uint32_t ce32, UBool withContext, UErrorCode &errorCode); 197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Copies base contractions to a list of ConditionalCE32. 199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Sets cond->next to the index of the first new item 200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * and returns the index of the last new item. 201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t copyContractionsFromBaseCE32(UnicodeString &context, UChar32 c, uint32_t ce32, 203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ConditionalCE32 *cond, UErrorCode &errorCode); 204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool getJamoCE32s(uint32_t jamoCE32s[], UErrorCode &errorCode); 206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setDigitTags(UErrorCode &errorCode); 207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setLeadSurrogates(UErrorCode &errorCode); 208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void buildMappings(CollationData &data, UErrorCode &errorCode); 210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void clearContexts(); 212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void buildContexts(UErrorCode &errorCode); 213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t buildContext(ConditionalCE32 *head, UErrorCode &errorCode); 214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t addContextTrie(uint32_t defaultCE32, UCharsTrieBuilder &trieBuilder, 215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void buildFastLatinTable(CollationData &data, UErrorCode &errorCode); 218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t getCEs(const UnicodeString &s, int32_t start, int64_t ces[], int32_t cesLength); 220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static UChar32 jamoCpFromIndex(int32_t i) { 222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // 0 <= i < CollationData::JAMO_CE32S_LENGTH = 19 + 21 + 27 223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i < Hangul::JAMO_L_COUNT) { return Hangul::JAMO_L_BASE + i; } 224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i -= Hangul::JAMO_L_COUNT; 225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i < Hangul::JAMO_V_COUNT) { return Hangul::JAMO_V_BASE + i; } 226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i -= Hangul::JAMO_V_COUNT; 227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // i < 27 228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return Hangul::JAMO_T_BASE + 1 + i; 229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** @see Collation::BUILDER_DATA_TAG */ 232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const uint32_t IS_BUILDER_JAMO_CE32 = 0x100; 233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const Normalizer2Impl &nfcImpl; 235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *base; 236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationSettings *baseSettings; 237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UTrie2 *trie; 238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector32 ce32s; 239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector64 ce64s; 240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector conditionalCE32s; // vector of ConditionalCE32 241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Characters that have context (prefixes or contraction suffixes). 242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet contextChars; 243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Serialized UCharsTrie structures for finalized contexts. 244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString contexts; 245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet unsafeBackwardSet; 246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool modified; 247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool fastLatinEnabled; 249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationFastLatinBuilder *fastLatinBuilder; 250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius DataBuilderCollationIterator *collIter; 252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END 255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // __COLLATIONDATABUILDER_H__ 258