1/*
2*******************************************************************************
3* Copyright (C) 2013-2014, International Business Machines
4* Corporation and others.  All Rights Reserved.
5*******************************************************************************
6* collationrootelements.cpp
7*
8* created on: 2013mar05
9* created by: Markus W. Scherer
10*/
11
12#include "unicode/utypes.h"
13
14#if !UCONFIG_NO_COLLATION
15
16#include "collation.h"
17#include "collationrootelements.h"
18#include "uassert.h"
19
20U_NAMESPACE_BEGIN
21
22int64_t
23CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const {
24    if(p == 0) { return 0; }
25    U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]);
26    int32_t index = findP(p);
27    uint32_t q = elements[index];
28    uint32_t secTer;
29    if(p == (q & 0xffffff00)) {
30        // p == elements[index] is a root primary. Find the CE before it.
31        // We must not be in a primary range.
32        U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
33        secTer = elements[index - 1];
34        if((secTer & SEC_TER_DELTA_FLAG) == 0) {
35            // Primary CE just before p.
36            p = secTer & 0xffffff00;
37            secTer = Collation::COMMON_SEC_AND_TER_CE;
38        } else {
39            // secTer = last secondary & tertiary for the previous primary
40            index -= 2;
41            for(;;) {
42                p = elements[index];
43                if((p & SEC_TER_DELTA_FLAG) == 0) {
44                    p &= 0xffffff00;
45                    break;
46                }
47                --index;
48            }
49        }
50    } else {
51        // p > elements[index] which is the previous primary.
52        // Find the last secondary & tertiary weights for it.
53        p = q & 0xffffff00;
54        secTer = Collation::COMMON_SEC_AND_TER_CE;
55        for(;;) {
56            q = elements[++index];
57            if((q & SEC_TER_DELTA_FLAG) == 0) {
58                // We must not be in a primary range.
59                U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
60                break;
61            }
62            secTer = q;
63        }
64    }
65    return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
66}
67
68int64_t
69CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const {
70    if(p == 0) { return 0; }
71    int32_t index = findP(p);
72    if(p != (elements[index] & 0xffffff00)) {
73        for(;;) {
74            p = elements[++index];
75            if((p & SEC_TER_DELTA_FLAG) == 0) {
76                // First primary after p. We must not be in a primary range.
77                U_ASSERT((p & PRIMARY_STEP_MASK) == 0);
78                break;
79            }
80        }
81    }
82    // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
83    return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE;
84}
85
86uint32_t
87CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const {
88    int32_t index = findPrimary(p);
89    int32_t step;
90    uint32_t q = elements[index];
91    if(p == (q & 0xffffff00)) {
92        // Found p itself. Return the previous primary.
93        // See if p is at the end of a previous range.
94        step = (int32_t)q & PRIMARY_STEP_MASK;
95        if(step == 0) {
96            // p is not at the end of a range. Look for the previous primary.
97            do {
98                p = elements[--index];
99            } while((p & SEC_TER_DELTA_FLAG) != 0);
100            return p & 0xffffff00;
101        }
102    } else {
103        // p is in a range, and not at the start.
104        uint32_t nextElement = elements[index + 1];
105        U_ASSERT(isEndOfPrimaryRange(nextElement));
106        step = (int32_t)nextElement & PRIMARY_STEP_MASK;
107    }
108    // Return the previous range primary.
109    if((p & 0xffff) == 0) {
110        return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step);
111    } else {
112        return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step);
113    }
114}
115
116uint32_t
117CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const {
118    int32_t index;
119    uint32_t previousSec, sec;
120    if(p == 0) {
121        index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
122        // Gap at the beginning of the secondary CE range.
123        previousSec = 0;
124        sec = elements[index] >> 16;
125    } else {
126        index = findPrimary(p) + 1;
127        previousSec = Collation::BEFORE_WEIGHT16;
128        sec = getFirstSecTerForPrimary(index) >> 16;
129    }
130    U_ASSERT(s >= sec);
131    while(s > sec) {
132        previousSec = sec;
133        U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
134        sec = elements[index++] >> 16;
135    }
136    U_ASSERT(sec == s);
137    return previousSec;
138}
139
140uint32_t
141CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const {
142    U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0);
143    int32_t index;
144    uint32_t previousTer, secTer;
145    if(p == 0) {
146        if(s == 0) {
147            index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
148            // Gap at the beginning of the tertiary CE range.
149            previousTer = 0;
150        } else {
151            index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
152            previousTer = Collation::BEFORE_WEIGHT16;
153        }
154        secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
155    } else {
156        index = findPrimary(p) + 1;
157        previousTer = Collation::BEFORE_WEIGHT16;
158        secTer = getFirstSecTerForPrimary(index);
159    }
160    uint32_t st = (s << 16) | t;
161    while(st > secTer) {
162        if((secTer >> 16) == s) { previousTer = secTer; }
163        U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
164        secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
165    }
166    U_ASSERT(secTer == st);
167    return previousTer & 0xffff;
168}
169
170uint32_t
171CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const {
172    U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1]));
173    uint32_t q = elements[++index];
174    int32_t step;
175    if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) {
176        // Return the next primary in this range.
177        if((p & 0xffff) == 0) {
178            return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step);
179        } else {
180            return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step);
181        }
182    } else {
183        // Return the next primary in the list.
184        while((q & SEC_TER_DELTA_FLAG) != 0) {
185            q = elements[++index];
186        }
187        U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
188        return q;
189    }
190}
191
192uint32_t
193CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const {
194    uint32_t secTer;
195    uint32_t secLimit;
196    if(index == 0) {
197        // primary = 0
198        U_ASSERT(s != 0);
199        index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
200        secTer = elements[index];
201        // Gap at the end of the secondary CE range.
202        secLimit = 0x10000;
203    } else {
204        U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
205        secTer = getFirstSecTerForPrimary(index + 1);
206        // If this is an explicit sec/ter unit, then it will be read once more.
207        // Gap for secondaries of primary CEs.
208        secLimit = getSecondaryBoundary();
209    }
210    for(;;) {
211        uint32_t sec = secTer >> 16;
212        if(sec > s) { return sec; }
213        secTer = elements[++index];
214        if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
215    }
216}
217
218uint32_t
219CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const {
220    uint32_t secTer;
221    uint32_t terLimit;
222    if(index == 0) {
223        // primary = 0
224        if(s == 0) {
225            U_ASSERT(t != 0);
226            index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
227            // Gap at the end of the tertiary CE range.
228            terLimit = 0x4000;
229        } else {
230            index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
231            // Gap for tertiaries of primary/secondary CEs.
232            terLimit = getTertiaryBoundary();
233        }
234        secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
235    } else {
236        U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
237        secTer = getFirstSecTerForPrimary(index + 1);
238        // If this is an explicit sec/ter unit, then it will be read once more.
239        terLimit = getTertiaryBoundary();
240    }
241    uint32_t st = (s << 16) | t;
242    for(;;) {
243        if(secTer > st) {
244            U_ASSERT((secTer >> 16) == s);
245            return secTer & 0xffff;
246        }
247        secTer = elements[++index];
248        // No tertiary greater than t for this primary+secondary.
249        if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
250        secTer &= ~SEC_TER_DELTA_FLAG;
251    }
252}
253
254uint32_t
255CollationRootElements::getFirstSecTerForPrimary(int32_t index) const {
256    uint32_t secTer = elements[index];
257    if((secTer & SEC_TER_DELTA_FLAG) == 0) {
258        // No sec/ter delta.
259        return Collation::COMMON_SEC_AND_TER_CE;
260    }
261    secTer &= ~SEC_TER_DELTA_FLAG;
262    if(secTer > Collation::COMMON_SEC_AND_TER_CE) {
263        // Implied sec/ter.
264        return Collation::COMMON_SEC_AND_TER_CE;
265    }
266    // Explicit sec/ter below common/common.
267    return secTer;
268}
269
270int32_t
271CollationRootElements::findPrimary(uint32_t p) const {
272    // Requirement: p must occur as a root primary.
273    U_ASSERT((p & 0xff) == 0);  // at most a 3-byte primary
274    int32_t index = findP(p);
275    // If p is in a range, then we just assume that p is an actual primary in this range.
276    // (Too cumbersome/expensive to check.)
277    // Otherwise, it must be an exact match.
278    U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00));
279    return index;
280}
281
282int32_t
283CollationRootElements::findP(uint32_t p) const {
284    // p need not occur as a root primary.
285    // For example, it might be a reordering group boundary.
286    U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE);
287    // modified binary search
288    int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX];
289    U_ASSERT(p >= elements[start]);
290    int32_t limit = length - 1;
291    U_ASSERT(elements[limit] >= PRIMARY_SENTINEL);
292    U_ASSERT(p < elements[limit]);
293    while((start + 1) < limit) {
294        // Invariant: elements[start] and elements[limit] are primaries,
295        // and elements[start]<=p<=elements[limit].
296        int32_t i = (start + limit) / 2;
297        uint32_t q = elements[i];
298        if((q & SEC_TER_DELTA_FLAG) != 0) {
299            // Find the next primary.
300            int32_t j = i + 1;
301            for(;;) {
302                if(j == limit) { break; }
303                q = elements[j];
304                if((q & SEC_TER_DELTA_FLAG) == 0) {
305                    i = j;
306                    break;
307                }
308                ++j;
309            }
310            if((q & SEC_TER_DELTA_FLAG) != 0) {
311                // Find the preceding primary.
312                j = i - 1;
313                for(;;) {
314                    if(j == start) { break; }
315                    q = elements[j];
316                    if((q & SEC_TER_DELTA_FLAG) == 0) {
317                        i = j;
318                        break;
319                    }
320                    --j;
321                }
322                if((q & SEC_TER_DELTA_FLAG) != 0) {
323                    // No primary between start and limit.
324                    break;
325                }
326            }
327        }
328        if(p < (q & 0xffffff00)) {  // Reset the "step" bits of a range end primary.
329            limit = i;
330        } else {
331            start = i;
332        }
333    }
334    return start;
335}
336
337U_NAMESPACE_END
338
339#endif  // !UCONFIG_NO_COLLATION
340