10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2012-2014, International Business Machines 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationdatabuilder.h 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2012apr01 11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __COLLATIONDATABUILDER_H__ 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __COLLATIONDATABUILDER_H__ 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uniset.h" 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/unistr.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uversion.h" 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h" 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h" 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsettings.h" 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h" 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utrie2.h" 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr32.h" 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr64.h" 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvector.h" 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstruct ConditionalCE32; 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CollationFastLatinBuilder; 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CopyHelper; 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass DataBuilderCollationIterator; 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass UCharsTrieBuilder; 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/** 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Low-level CollationData builder. 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Takes (character, CE) pairs and builds them into runtime data structures. 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Supports characters with context prefixes and contraction suffixes. 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API CollationDataBuilder : public UObject { 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Collation element modifier. Interface class for a modifier 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * that changes a tailoring builder's temporary CEs to final CEs. 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Called for every non-special CE32 and every expansion CE. 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius class CEModifier : public UObject { 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius public: 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual ~CEModifier(); 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Returns a new CE to replace the non-special input CE32, or else Collation::NO_CE. */ 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual int64_t modifyCE32(uint32_t ce32) const = 0; 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Returns a new CE to replace the input CE, or else Collation::NO_CE. */ 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual int64_t modifyCE(int64_t ce) const = 0; 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius }; 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationDataBuilder(UErrorCode &errorCode); 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual ~CollationDataBuilder(); 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void initForTailoring(const CollationData *b, UErrorCode &errorCode); 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UBool isCompressibleLeadByte(uint32_t b) const; 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline UBool isCompressiblePrimary(uint32_t p) const { 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return isCompressibleLeadByte(p >> 24); 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return TRUE if this builder has mappings (e.g., add() has been called) 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool hasMappings() const { return modified; } 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return TRUE if c has CEs in this builder 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool isAssigned(UChar32 c) const; 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return the three-byte primary if c maps to a single such CE and has no context data, 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * otherwise returns 0. 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t getLongPrimaryIfSingleCE(UChar32 c) const; 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return the single CE for c. 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Sets an error code if c does not have a single CE. 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t getSingleCE(UChar32 c, UErrorCode &errorCode) const; 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void add(const UnicodeString &prefix, const UnicodeString &s, 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const int64_t ces[], int32_t cesLength, 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Encodes the ces as either the returned ce32 by itself, 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * or by storing an expansion, with the returned ce32 referring to that. 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * add(p, s, ces, cesLength) = addCE32(p, s, encodeCEs(ces, cesLength)) 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void addCE32(const UnicodeString &prefix, const UnicodeString &s, 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t ce32, UErrorCode &errorCode); 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Sets three-byte-primary CEs for a range of code points in code point order, 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * if it is worth doing; otherwise no change is made. 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * None of the code points in the range should have complex mappings so far 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * (expansions/contractions/prefixes). 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param start first code point 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param end last code point (inclusive) 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param primary primary weight for 'start' 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param step per-code point primary-weight increment 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param errorCode ICU in/out error code 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return TRUE if an OFFSET_TAG range was used for start..end 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool maybeSetPrimaryRange(UChar32 start, UChar32 end, 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t primary, int32_t step, 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Sets three-byte-primary CEs for a range of code points in code point order. 129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Sets range values if that is worth doing, or else individual values. 130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * None of the code points in the range should have complex mappings so far 131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * (expansions/contractions/prefixes). 132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param start first code point 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param end last code point (inclusive) 134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param primary primary weight for 'start' 135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param step per-code point primary-weight increment 136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param errorCode ICU in/out error code 137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return the next primary after 'end': start primary incremented by ((end-start)+1)*step 138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t setPrimaryRangeAndReturnNext(UChar32 start, UChar32 end, 140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t primary, int32_t step, 141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Copies all mappings from the src builder, with modifications. 145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * This builder here must not be built yet, and should be empty. 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void copyFrom(const CollationDataBuilder &src, const CEModifier &modifier, 148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void optimize(const UnicodeSet &set, UErrorCode &errorCode); 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void suppressContractions(const UnicodeSet &set, UErrorCode &errorCode); 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void enableFastLatin() { fastLatinEnabled = TRUE; } 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void build(CollationData &data, UErrorCode &errorCode); 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Looks up CEs for s and appends them to the ces array. 158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Does not handle normalization: s should be in FCD form. 159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Does not write completely ignorable CEs. 161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Does not write beyond Collation::MAX_EXPANSION_LENGTH. 162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return incremented cesLength 164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t getCEs(const UnicodeString &s, int64_t ces[], int32_t cesLength); 166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t getCEs(const UnicodeString &prefix, const UnicodeString &s, 167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ces[], int32_t cesLength); 168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected: 170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius friend class CopyHelper; 171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius friend class DataBuilderCollationIterator; 172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t getCE32FromOffsetCE32(UBool fromBase, UChar32 c, uint32_t ce32) const; 174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t addCE(int64_t ce, UErrorCode &errorCode); 176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t addCE32(uint32_t ce32, UErrorCode &errorCode); 177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t addConditionalCE32(const UnicodeString &context, uint32_t ce32, UErrorCode &errorCode); 178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline ConditionalCE32 *getConditionalCE32(int32_t index) const { 180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return static_cast<ConditionalCE32 *>(conditionalCE32s[index]); 181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline ConditionalCE32 *getConditionalCE32ForCE32(uint32_t ce32) const { 183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return getConditionalCE32(Collation::indexFromCE32(ce32)); 184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static uint32_t makeBuilderContextCE32(int32_t index) { 187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return Collation::makeCE32FromTagAndIndex(Collation::BUILDER_DATA_TAG, index); 188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static inline UBool isBuilderContextCE32(uint32_t ce32) { 190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return Collation::hasCE32Tag(ce32, Collation::BUILDER_DATA_TAG); 191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static uint32_t encodeOneCEAsCE32(int64_t ce); 194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t encodeOneCE(int64_t ce, UErrorCode &errorCode); 195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t encodeExpansion(const int64_t ces[], int32_t length, UErrorCode &errorCode); 196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t encodeExpansion32(const int32_t newCE32s[], int32_t length, UErrorCode &errorCode); 197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t copyFromBaseCE32(UChar32 c, uint32_t ce32, UBool withContext, UErrorCode &errorCode); 199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Copies base contractions to a list of ConditionalCE32. 201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Sets cond->next to the index of the first new item 202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * and returns the index of the last new item. 203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t copyContractionsFromBaseCE32(UnicodeString &context, UChar32 c, uint32_t ce32, 205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ConditionalCE32 *cond, UErrorCode &errorCode); 206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool getJamoCE32s(uint32_t jamoCE32s[], UErrorCode &errorCode); 208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setDigitTags(UErrorCode &errorCode); 209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setLeadSurrogates(UErrorCode &errorCode); 210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void buildMappings(CollationData &data, UErrorCode &errorCode); 212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void clearContexts(); 214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void buildContexts(UErrorCode &errorCode); 215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t buildContext(ConditionalCE32 *head, UErrorCode &errorCode); 216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t addContextTrie(uint32_t defaultCE32, UCharsTrieBuilder &trieBuilder, 217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void buildFastLatinTable(CollationData &data, UErrorCode &errorCode); 220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t getCEs(const UnicodeString &s, int32_t start, int64_t ces[], int32_t cesLength); 222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static UChar32 jamoCpFromIndex(int32_t i) { 224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // 0 <= i < CollationData::JAMO_CE32S_LENGTH = 19 + 21 + 27 225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i < Hangul::JAMO_L_COUNT) { return Hangul::JAMO_L_BASE + i; } 226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i -= Hangul::JAMO_L_COUNT; 227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i < Hangul::JAMO_V_COUNT) { return Hangul::JAMO_V_BASE + i; } 228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i -= Hangul::JAMO_V_COUNT; 229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // i < 27 230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return Hangul::JAMO_T_BASE + 1 + i; 231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** @see Collation::BUILDER_DATA_TAG */ 234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const uint32_t IS_BUILDER_JAMO_CE32 = 0x100; 235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const Normalizer2Impl &nfcImpl; 237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *base; 238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationSettings *baseSettings; 239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UTrie2 *trie; 240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector32 ce32s; 241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector64 ce64s; 242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector conditionalCE32s; // vector of ConditionalCE32 243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Characters that have context (prefixes or contraction suffixes). 244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet contextChars; 245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Serialized UCharsTrie structures for finalized contexts. 246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString contexts; 247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet unsafeBackwardSet; 248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool modified; 249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool fastLatinEnabled; 251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationFastLatinBuilder *fastLatinBuilder; 252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius DataBuilderCollationIterator *collIter; 254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END 257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // __COLLATIONDATABUILDER_H__ 260