10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*   Copyright (C) 1999-2014, International Business Machines
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*   Corporation and others.  All Rights Reserved.
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*   file name:  collationweights.h
110596faeddefbf198de137d5e893708495ab1584cFredrik Roubert*   encoding:   UTF-8
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*   tab size:   8 (not used)
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*   indentation:4
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*   created on: 2001mar08 as ucol_wgt.h
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*   created by: Markus W. Scherer
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __COLLATIONWEIGHTS_H__
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __COLLATIONWEIGHTS_H__
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uobject.h"
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/**
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Allocates n collation element weights between two exclusive limits.
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Used only internally by the collation tailoring builder.
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API CollationWeights : public UMemory {
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationWeights();
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int32_t lengthOfWeight(uint32_t weight) {
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((weight&0xffffff)==0) {
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return 1;
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if((weight&0xffff)==0) {
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return 2;
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if((weight&0xff)==0) {
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return 3;
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return 4;
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void initForPrimary(UBool compressible);
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void initForSecondary();
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void initForTertiary();
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Determine heuristically
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * what ranges to use for a given number of weights between (excluding)
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * two limits.
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param lowerLimit A collation element weight; the ranges will be filled to cover
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *                   weights greater than this one.
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param upperLimit A collation element weight; the ranges will be filled to cover
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *                   weights less than this one.
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @param n          The number of collation element weights w necessary such that
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *                   lowerLimit<w<upperLimit in lexical order.
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return TRUE if it is possible to fit n elements between the limits
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool allocWeights(uint32_t lowerLimit, uint32_t upperLimit, int32_t n);
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Given a set of ranges calculated by allocWeights(),
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * iterate through the weights.
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * The ranges are modified to keep the current iteration state.
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return The next weight in the ranges, or 0xffffffff if there is none left.
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t nextWeight();
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** @internal */
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    struct WeightRange {
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t start, end;
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t length, count;
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    };
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** @return number of usable byte values for byte idx */
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    inline int32_t countBytes(int32_t idx) const {
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return (int32_t)(maxBytes[idx] - minBytes[idx] + 1);
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t incWeight(uint32_t weight, int32_t length) const;
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t incWeightByOffset(uint32_t weight, int32_t length, int32_t offset) const;
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void lengthenRange(WeightRange &range) const;
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Takes two CE weights and calculates the
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * possible ranges of weights between the two limits, excluding them.
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * For weights with up to 4 bytes there are up to 2*4-1=7 ranges.
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool getWeightRanges(uint32_t lowerLimit, uint32_t upperLimit);
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool allocWeightsInShortRanges(int32_t n, int32_t minLength);
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool allocWeightsInMinLengthRanges(int32_t n, int32_t minLength);
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t middleLength;
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t minBytes[5];  // for byte 1, 2, 3, 4
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t maxBytes[5];
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    WeightRange ranges[7];
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t rangeIndex;
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t rangeCount;
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // __COLLATIONWEIGHTS_H__
114