1/*
2*******************************************************************************
3* Copyright (C) 2012-2014, International Business Machines
4* Corporation and others.  All Rights Reserved.
5*******************************************************************************
6* collationkeys.h
7*
8* created on: 2012sep02
9* created by: Markus W. Scherer
10*/
11
12#ifndef __COLLATIONKEYS_H__
13#define __COLLATIONKEYS_H__
14
15#include "unicode/utypes.h"
16
17#if !UCONFIG_NO_COLLATION
18
19#include "unicode/bytestream.h"
20#include "unicode/ucol.h"
21#include "charstr.h"
22#include "collation.h"
23
24U_NAMESPACE_BEGIN
25
26class CollationIterator;
27struct CollationDataReader;
28struct CollationSettings;
29
30class SortKeyByteSink : public ByteSink {
31public:
32    SortKeyByteSink(char *dest, int32_t destCapacity)
33            : buffer_(dest), capacity_(destCapacity),
34              appended_(0), ignore_(0) {}
35    virtual ~SortKeyByteSink();
36
37    void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }
38
39    virtual void Append(const char *bytes, int32_t n);
40    void Append(uint32_t b) {
41        if (ignore_ > 0) {
42            --ignore_;
43        } else {
44            if (appended_ < capacity_ || Resize(1, appended_)) {
45                buffer_[appended_] = (char)b;
46            }
47            ++appended_;
48        }
49    }
50    virtual char *GetAppendBuffer(int32_t min_capacity,
51                                  int32_t desired_capacity_hint,
52                                  char *scratch, int32_t scratch_capacity,
53                                  int32_t *result_capacity);
54    int32_t NumberOfBytesAppended() const { return appended_; }
55
56    /**
57     * @return how many bytes can be appended (including ignored ones)
58     *         without reallocation
59     */
60    int32_t GetRemainingCapacity() const {
61        // Either ignore_ or appended_ should be 0.
62        return ignore_ + capacity_ - appended_;
63    }
64
65    UBool Overflowed() const { return appended_ > capacity_; }
66    /** @return FALSE if memory allocation failed */
67    UBool IsOk() const { return buffer_ != NULL; }
68
69protected:
70    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
71    virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
72
73    void SetNotOk() {
74        buffer_ = NULL;
75        capacity_ = 0;
76    }
77
78    char *buffer_;
79    int32_t capacity_;
80    int32_t appended_;
81    int32_t ignore_;
82
83private:
84    SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
85    SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
86};
87
88class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
89public:
90    class LevelCallback : public UMemory {
91    public:
92        virtual ~LevelCallback();
93        /**
94         * @param level The next level about to be written to the ByteSink.
95         * @return TRUE if the level is to be written
96         *         (the base class implementation always returns TRUE)
97         */
98        virtual UBool needToWrite(Collation::Level level);
99    };
100
101    /**
102     * Writes the sort key bytes for minLevel up to the iterator data's strength.
103     * Optionally writes the case level.
104     * Stops writing levels when callback.needToWrite(level) returns FALSE.
105     * Separates levels with the LEVEL_SEPARATOR_BYTE
106     * but does not write a TERMINATOR_BYTE.
107     */
108    static void writeSortKeyUpToQuaternary(CollationIterator &iter,
109                                           const UBool *compressibleBytes,
110                                           const CollationSettings &settings,
111                                           SortKeyByteSink &sink,
112                                           Collation::Level minLevel, LevelCallback &callback,
113                                           UBool preflight, UErrorCode &errorCode);
114private:
115    friend struct CollationDataReader;
116
117    CollationKeys();  // no instantiation
118
119    // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
120    static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
121    static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
122    static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
123    static const int32_t SEC_COMMON_MAX_COUNT = 0x21;
124
125    // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
126    static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
127    static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
128    static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
129    static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
130
131    // Case level, upperFirst: Compress up to 13 common weights as 3..15.
132    static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
133    static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
134    static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
135
136    // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
137    static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
138    static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
139    static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
140    static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;
141
142    // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
143    static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
144    static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
145    static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
146    static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
147
148    // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
149    static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
150    static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
151    static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
152    static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
153
154    // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
155    static const uint32_t QUAT_COMMON_LOW = 0x1c;
156    static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
157    static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
158    static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
159    // Primary weights shifted to quaternary level must be encoded with
160    // a lead byte below the common-weight compression range.
161    static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1;  // 0x1b
162};
163
164U_NAMESPACE_END
165
166#endif  // !UCONFIG_NO_COLLATION
167#endif  // __COLLATIONKEYS_H__
168