1/*
2******************************************************************************
3*   Copyright (C) 2001-2014, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5******************************************************************************
6*
7* File ucoleitr.cpp
8*
9* Modification History:
10*
11* Date        Name        Description
12* 02/15/2001  synwee      Modified all methods to process its own function
13*                         instead of calling the equivalent c++ api (coleitr.h)
14* 2012-2014   markus      Rewritten in C++ again.
15******************************************************************************/
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_COLLATION
20
21#include "unicode/coleitr.h"
22#include "unicode/tblcoll.h"
23#include "unicode/ucoleitr.h"
24#include "unicode/ustring.h"
25#include "unicode/sortkey.h"
26#include "unicode/uobject.h"
27#include "cmemory.h"
28#include "usrchimp.h"
29
30#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
31
32U_NAMESPACE_USE
33
34#define BUFFER_LENGTH             100
35
36#define DEFAULT_BUFFER_SIZE 16
37#define BUFFER_GROW 8
38
39#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
40
41#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
42
43#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
44
45#define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0])
46
47#define DELETE_ARRAY(array) uprv_free((void *) (array))
48
49struct RCEI
50{
51    uint32_t ce;
52    int32_t  low;
53    int32_t  high;
54};
55
56U_NAMESPACE_BEGIN
57
58struct RCEBuffer
59{
60    RCEI    defaultBuffer[DEFAULT_BUFFER_SIZE];
61    RCEI   *buffer;
62    int32_t bufferIndex;
63    int32_t bufferSize;
64
65    RCEBuffer();
66    ~RCEBuffer();
67
68    UBool empty() const;
69    void  put(uint32_t ce, int32_t ixLow, int32_t ixHigh);
70    const RCEI *get();
71};
72
73RCEBuffer::RCEBuffer()
74{
75    buffer = defaultBuffer;
76    bufferIndex = 0;
77    bufferSize = LENGTHOF(defaultBuffer);
78}
79
80RCEBuffer::~RCEBuffer()
81{
82    if (buffer != defaultBuffer) {
83        DELETE_ARRAY(buffer);
84    }
85}
86
87UBool RCEBuffer::empty() const
88{
89    return bufferIndex <= 0;
90}
91
92void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh)
93{
94    if (bufferIndex >= bufferSize) {
95        RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
96
97        ARRAY_COPY(newBuffer, buffer, bufferSize);
98
99        if (buffer != defaultBuffer) {
100            DELETE_ARRAY(buffer);
101        }
102
103        buffer = newBuffer;
104        bufferSize += BUFFER_GROW;
105    }
106
107    buffer[bufferIndex].ce   = ce;
108    buffer[bufferIndex].low  = ixLow;
109    buffer[bufferIndex].high = ixHigh;
110
111    bufferIndex += 1;
112}
113
114const RCEI *RCEBuffer::get()
115{
116    if (bufferIndex > 0) {
117     return &buffer[--bufferIndex];
118    }
119
120    return NULL;
121}
122
123PCEBuffer::PCEBuffer()
124{
125    buffer = defaultBuffer;
126    bufferIndex = 0;
127    bufferSize = LENGTHOF(defaultBuffer);
128}
129
130PCEBuffer::~PCEBuffer()
131{
132    if (buffer != defaultBuffer) {
133        DELETE_ARRAY(buffer);
134    }
135}
136
137void PCEBuffer::reset()
138{
139    bufferIndex = 0;
140}
141
142UBool PCEBuffer::empty() const
143{
144    return bufferIndex <= 0;
145}
146
147void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh)
148{
149    if (bufferIndex >= bufferSize) {
150        PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
151
152        ARRAY_COPY(newBuffer, buffer, bufferSize);
153
154        if (buffer != defaultBuffer) {
155            DELETE_ARRAY(buffer);
156        }
157
158        buffer = newBuffer;
159        bufferSize += BUFFER_GROW;
160    }
161
162    buffer[bufferIndex].ce   = ce;
163    buffer[bufferIndex].low  = ixLow;
164    buffer[bufferIndex].high = ixHigh;
165
166    bufferIndex += 1;
167}
168
169const PCEI *PCEBuffer::get()
170{
171    if (bufferIndex > 0) {
172     return &buffer[--bufferIndex];
173    }
174
175    return NULL;
176}
177
178UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); }
179
180UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); }
181
182void UCollationPCE::init(UCollationElements *elems) {
183    init(CollationElementIterator::fromUCollationElements(elems));
184}
185
186void UCollationPCE::init(CollationElementIterator *iter)
187{
188    cei = iter;
189    init(*iter->rbc_);
190}
191
192void UCollationPCE::init(const Collator &coll)
193{
194    UErrorCode status = U_ZERO_ERROR;
195
196    strength    = coll.getAttribute(UCOL_STRENGTH, status);
197    toShift     = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
198    isShifted   = FALSE;
199    variableTop = coll.getVariableTop(status);
200}
201
202UCollationPCE::~UCollationPCE()
203{
204    // nothing to do
205}
206
207uint64_t UCollationPCE::processCE(uint32_t ce)
208{
209    uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
210
211    // This is clean, but somewhat slow...
212    // We could apply the mask to ce and then
213    // just get all three orders...
214    switch(strength) {
215    default:
216        tertiary = ucol_tertiaryOrder(ce);
217        /* note fall-through */
218
219    case UCOL_SECONDARY:
220        secondary = ucol_secondaryOrder(ce);
221        /* note fall-through */
222
223    case UCOL_PRIMARY:
224        primary = ucol_primaryOrder(ce);
225    }
226
227    // **** This should probably handle continuations too.  ****
228    // **** That means that we need 24 bits for the primary ****
229    // **** instead of the 16 that we're currently using.   ****
230    // **** So we can lay out the 64 bits as: 24.12.12.16.  ****
231    // **** Another complication with continuations is that ****
232    // **** the *second* CE is marked as a continuation, so ****
233    // **** we always have to peek ahead to know how long   ****
234    // **** the primary is...                               ****
235    if ((toShift && variableTop > ce && primary != 0)
236                || (isShifted && primary == 0)) {
237
238        if (primary == 0) {
239            return UCOL_IGNORABLE;
240        }
241
242        if (strength >= UCOL_QUATERNARY) {
243            quaternary = primary;
244        }
245
246        primary = secondary = tertiary = 0;
247        isShifted = TRUE;
248    } else {
249        if (strength >= UCOL_QUATERNARY) {
250            quaternary = 0xFFFF;
251        }
252
253        isShifted = FALSE;
254    }
255
256    return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
257}
258
259U_NAMESPACE_END
260
261/* public methods ---------------------------------------------------- */
262
263U_CAPI UCollationElements* U_EXPORT2
264ucol_openElements(const UCollator  *coll,
265                  const UChar      *text,
266                        int32_t    textLength,
267                        UErrorCode *status)
268{
269    if (U_FAILURE(*status)) {
270        return NULL;
271    }
272    if (coll == NULL || (text == NULL && textLength != 0)) {
273        *status = U_ILLEGAL_ARGUMENT_ERROR;
274        return NULL;
275    }
276    const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
277    if (rbc == NULL) {
278        *status = U_UNSUPPORTED_ERROR;  // coll is a Collator but not a RuleBasedCollator
279        return NULL;
280    }
281
282    UnicodeString s((UBool)(textLength < 0), text, textLength);
283    CollationElementIterator *cei = rbc->createCollationElementIterator(s);
284    if (cei == NULL) {
285        *status = U_MEMORY_ALLOCATION_ERROR;
286        return NULL;
287    }
288
289    return cei->toUCollationElements();
290}
291
292
293U_CAPI void U_EXPORT2
294ucol_closeElements(UCollationElements *elems)
295{
296    delete CollationElementIterator::fromUCollationElements(elems);
297}
298
299U_CAPI void U_EXPORT2
300ucol_reset(UCollationElements *elems)
301{
302    CollationElementIterator::fromUCollationElements(elems)->reset();
303}
304
305U_CAPI int32_t U_EXPORT2
306ucol_next(UCollationElements *elems,
307          UErrorCode         *status)
308{
309    if (U_FAILURE(*status)) {
310        return UCOL_NULLORDER;
311    }
312
313    return CollationElementIterator::fromUCollationElements(elems)->next(*status);
314}
315
316U_NAMESPACE_BEGIN
317
318int64_t
319UCollationPCE::nextProcessed(
320                   int32_t            *ixLow,
321                   int32_t            *ixHigh,
322                   UErrorCode         *status)
323{
324    int64_t result = UCOL_IGNORABLE;
325    uint32_t low = 0, high = 0;
326
327    if (U_FAILURE(*status)) {
328        return UCOL_PROCESSED_NULLORDER;
329    }
330
331    pceBuffer.reset();
332
333    do {
334        low = cei->getOffset();
335        int32_t ce = cei->next(*status);
336        high = cei->getOffset();
337
338        if (ce == UCOL_NULLORDER) {
339             result = UCOL_PROCESSED_NULLORDER;
340             break;
341        }
342
343        result = processCE((uint32_t)ce);
344    } while (result == UCOL_IGNORABLE);
345
346    if (ixLow != NULL) {
347        *ixLow = low;
348    }
349
350    if (ixHigh != NULL) {
351        *ixHigh = high;
352    }
353
354    return result;
355}
356
357U_NAMESPACE_END
358
359U_CAPI int32_t U_EXPORT2
360ucol_previous(UCollationElements *elems,
361              UErrorCode         *status)
362{
363    if(U_FAILURE(*status)) {
364        return UCOL_NULLORDER;
365    }
366    return CollationElementIterator::fromUCollationElements(elems)->previous(*status);
367}
368
369U_NAMESPACE_BEGIN
370
371int64_t
372UCollationPCE::previousProcessed(
373                   int32_t            *ixLow,
374                   int32_t            *ixHigh,
375                   UErrorCode         *status)
376{
377    int64_t result = UCOL_IGNORABLE;
378    int32_t  low = 0, high = 0;
379
380    if (U_FAILURE(*status)) {
381        return UCOL_PROCESSED_NULLORDER;
382    }
383
384    // pceBuffer.reset();
385
386    while (pceBuffer.empty()) {
387        // buffer raw CEs up to non-ignorable primary
388        RCEBuffer rceb;
389        int32_t ce;
390
391        // **** do we need to reset rceb, or will it always be empty at this point ****
392        do {
393            high = cei->getOffset();
394            ce   = cei->previous(*status);
395            low  = cei->getOffset();
396
397            if (ce == UCOL_NULLORDER) {
398                if (! rceb.empty()) {
399                    break;
400                }
401
402                goto finish;
403            }
404
405            rceb.put((uint32_t)ce, low, high);
406        } while ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce));
407
408        // process the raw CEs
409        while (! rceb.empty()) {
410            const RCEI *rcei = rceb.get();
411
412            result = processCE(rcei->ce);
413
414            if (result != UCOL_IGNORABLE) {
415                pceBuffer.put(result, rcei->low, rcei->high);
416            }
417        }
418    }
419
420finish:
421    if (pceBuffer.empty()) {
422        // **** Is -1 the right value for ixLow, ixHigh? ****
423    	if (ixLow != NULL) {
424    		*ixLow = -1;
425    	}
426
427    	if (ixHigh != NULL) {
428    		*ixHigh = -1
429    		;
430    	}
431        return UCOL_PROCESSED_NULLORDER;
432    }
433
434    const PCEI *pcei = pceBuffer.get();
435
436    if (ixLow != NULL) {
437        *ixLow = pcei->low;
438    }
439
440    if (ixHigh != NULL) {
441        *ixHigh = pcei->high;
442    }
443
444    return pcei->ce;
445}
446
447U_NAMESPACE_END
448
449U_CAPI int32_t U_EXPORT2
450ucol_getMaxExpansion(const UCollationElements *elems,
451                           int32_t            order)
452{
453    return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order);
454
455    // TODO: The old code masked the order according to strength and then did a binary search.
456    // However this was probably at least partially broken because of the following comment.
457    // Still, it might have found a match when this version may not.
458
459    // FIXME: with a masked search, there might be more than one hit,
460    // so we need to look forward and backward from the match to find all
461    // of the hits...
462}
463
464U_CAPI void U_EXPORT2
465ucol_setText(      UCollationElements *elems,
466             const UChar              *text,
467                   int32_t            textLength,
468                   UErrorCode         *status)
469{
470    if (U_FAILURE(*status)) {
471        return;
472    }
473
474    if ((text == NULL && textLength != 0)) {
475        *status = U_ILLEGAL_ARGUMENT_ERROR;
476        return;
477    }
478    UnicodeString s((UBool)(textLength < 0), text, textLength);
479    return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status);
480}
481
482U_CAPI int32_t U_EXPORT2
483ucol_getOffset(const UCollationElements *elems)
484{
485    return CollationElementIterator::fromUCollationElements(elems)->getOffset();
486}
487
488U_CAPI void U_EXPORT2
489ucol_setOffset(UCollationElements    *elems,
490               int32_t           offset,
491               UErrorCode            *status)
492{
493    if (U_FAILURE(*status)) {
494        return;
495    }
496
497    CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status);
498}
499
500U_CAPI int32_t U_EXPORT2
501ucol_primaryOrder (int32_t order)
502{
503    return (order >> 16) & 0xffff;
504}
505
506U_CAPI int32_t U_EXPORT2
507ucol_secondaryOrder (int32_t order)
508{
509    return (order >> 8) & 0xff;
510}
511
512U_CAPI int32_t U_EXPORT2
513ucol_tertiaryOrder (int32_t order)
514{
515    return order & 0xff;
516}
517
518#endif /* #if !UCONFIG_NO_COLLATION */
519