10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 1999-2014, International Business Machines 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* file name: collationweights.h 110596faeddefbf198de137d5e893708495ab1584cFredrik Roubert* encoding: UTF-8 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* tab size: 8 (not used) 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* indentation:4 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2001mar08 as ucol_wgt.h 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __COLLATIONWEIGHTS_H__ 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __COLLATIONWEIGHTS_H__ 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uobject.h" 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/** 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Allocates n collation element weights between two exclusive limits. 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Used only internally by the collation tailoring builder. 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API CollationWeights : public UMemory { 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationWeights(); 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static inline int32_t lengthOfWeight(uint32_t weight) { 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((weight&0xffffff)==0) { 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return 1; 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if((weight&0xffff)==0) { 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return 2; 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if((weight&0xff)==0) { 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return 3; 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return 4; 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void initForPrimary(UBool compressible); 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void initForSecondary(); 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void initForTertiary(); 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Determine heuristically 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * what ranges to use for a given number of weights between (excluding) 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * two limits. 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param lowerLimit A collation element weight; the ranges will be filled to cover 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * weights greater than this one. 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param upperLimit A collation element weight; the ranges will be filled to cover 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * weights less than this one. 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @param n The number of collation element weights w necessary such that 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * lowerLimit<w<upperLimit in lexical order. 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return TRUE if it is possible to fit n elements between the limits 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool allocWeights(uint32_t lowerLimit, uint32_t upperLimit, int32_t n); 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Given a set of ranges calculated by allocWeights(), 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * iterate through the weights. 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * The ranges are modified to keep the current iteration state. 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return The next weight in the ranges, or 0xffffffff if there is none left. 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t nextWeight(); 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** @internal */ 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius struct WeightRange { 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t start, end; 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length, count; 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius }; 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate: 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** @return number of usable byte values for byte idx */ 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline int32_t countBytes(int32_t idx) const { 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (int32_t)(maxBytes[idx] - minBytes[idx] + 1); 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t incWeight(uint32_t weight, int32_t length) const; 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t incWeightByOffset(uint32_t weight, int32_t length, int32_t offset) const; 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void lengthenRange(WeightRange &range) const; 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Takes two CE weights and calculates the 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * possible ranges of weights between the two limits, excluding them. 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * For weights with up to 4 bytes there are up to 2*4-1=7 ranges. 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit); 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool allocWeightsInShortRanges(int32_t n, int32_t minLength); 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool allocWeightsInMinLengthRanges(int32_t n, int32_t minLength); 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t middleLength; 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t minBytes[5]; // for byte 1, 2, 3, 4 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t maxBytes[5]; 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius WeightRange ranges[7]; 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t rangeIndex; 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t rangeCount; 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // __COLLATIONWEIGHTS_H__ 114