1/*
2*******************************************************************************
3* Copyright (C) 1996-2009, International Business Machines Corporation and    *
4* others. All Rights Reserved.                                                *
5*******************************************************************************
6*/
7
8/*
9* File coleitr.cpp
10*
11*
12*
13* Created by: Helena Shih
14*
15* Modification History:
16*
17*  Date      Name        Description
18*
19*  6/23/97   helena      Adding comments to make code more readable.
20* 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
21* 12/10/99   aliu        Ported Thai collation support from Java.
22* 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
23* 02/19/01   swquek      Removed CollationElementsIterator() since it is
24*                        private constructor and no calls are made to it
25*/
26
27#include "unicode/utypes.h"
28
29#if !UCONFIG_NO_COLLATION
30
31#include "unicode/coleitr.h"
32#include "unicode/ustring.h"
33#include "ucol_imp.h"
34#include "cmemory.h"
35
36
37/* Constants --------------------------------------------------------------- */
38
39U_NAMESPACE_BEGIN
40
41UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
42
43/* CollationElementIterator public constructor/destructor ------------------ */
44
45CollationElementIterator::CollationElementIterator(
46                                         const CollationElementIterator& other)
47                                         : UObject(other), isDataOwned_(TRUE)
48{
49    UErrorCode status = U_ZERO_ERROR;
50    m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
51                                &status);
52
53    *this = other;
54}
55
56CollationElementIterator::~CollationElementIterator()
57{
58    if (isDataOwned_) {
59        ucol_closeElements(m_data_);
60    }
61}
62
63/* CollationElementIterator public methods --------------------------------- */
64
65int32_t CollationElementIterator::getOffset() const
66{
67    return ucol_getOffset(m_data_);
68}
69
70/**
71* Get the ordering priority of the next character in the string.
72* @return the next character's ordering. Returns NULLORDER if an error has
73*         occured or if the end of string has been reached
74*/
75int32_t CollationElementIterator::next(UErrorCode& status)
76{
77    return ucol_next(m_data_, &status);
78}
79
80UBool CollationElementIterator::operator!=(
81                                  const CollationElementIterator& other) const
82{
83    return !(*this == other);
84}
85
86UBool CollationElementIterator::operator==(
87                                    const CollationElementIterator& that) const
88{
89    if (this == &that || m_data_ == that.m_data_) {
90        return TRUE;
91    }
92
93    // option comparison
94    if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
95    {
96        return FALSE;
97    }
98
99    // the constructor and setText always sets a length
100    // and we only compare the string not the contents of the normalization
101    // buffer
102    int thislength = m_data_->iteratordata_.endp -
103                     m_data_->iteratordata_.string;
104    int thatlength = that.m_data_->iteratordata_.endp -
105                     that.m_data_->iteratordata_.string;
106
107    if (thislength != thatlength) {
108        return FALSE;
109    }
110
111    if (uprv_memcmp(m_data_->iteratordata_.string,
112                    that.m_data_->iteratordata_.string,
113                    thislength * U_SIZEOF_UCHAR) != 0) {
114        return FALSE;
115    }
116    if (getOffset() != that.getOffset()) {
117        return FALSE;
118    }
119
120    // checking normalization buffer
121    if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
122        if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
123            return FALSE;
124        }
125        // both are in the normalization buffer
126        if (m_data_->iteratordata_.pos
127            - m_data_->iteratordata_.writableBuffer
128            != that.m_data_->iteratordata_.pos
129            - that.m_data_->iteratordata_.writableBuffer) {
130            // not in the same position in the normalization buffer
131            return FALSE;
132        }
133    }
134    else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
135        return FALSE;
136    }
137    // checking ce position
138    return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
139            == (that.m_data_->iteratordata_.CEpos
140                                        - that.m_data_->iteratordata_.CEs);
141}
142
143/**
144* Get the ordering priority of the previous collation element in the string.
145* @param status the error code status.
146* @return the previous element's ordering. Returns NULLORDER if an error has
147*         occured or if the start of string has been reached.
148*/
149int32_t CollationElementIterator::previous(UErrorCode& status)
150{
151    return ucol_previous(m_data_, &status);
152}
153
154/**
155* Resets the cursor to the beginning of the string.
156*/
157void CollationElementIterator::reset()
158{
159    ucol_reset(m_data_);
160}
161
162void CollationElementIterator::setOffset(int32_t newOffset,
163                                         UErrorCode& status)
164{
165    ucol_setOffset(m_data_, newOffset, &status);
166}
167
168/**
169* Sets the source to the new source string.
170*/
171void CollationElementIterator::setText(const UnicodeString& source,
172                                       UErrorCode& status)
173{
174    if (U_FAILURE(status)) {
175        return;
176    }
177
178    int32_t length = source.length();
179    UChar *string = NULL;
180    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
181        uprv_free(m_data_->iteratordata_.string);
182    }
183    m_data_->isWritable = TRUE;
184    if (length > 0) {
185        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
186        /* test for NULL */
187        if (string == NULL) {
188            status = U_MEMORY_ALLOCATION_ERROR;
189            return;
190        }
191        u_memcpy(string, source.getBuffer(), length);
192    }
193    else {
194        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
195        /* test for NULL */
196        if (string == NULL) {
197            status = U_MEMORY_ALLOCATION_ERROR;
198            return;
199        }
200        *string = 0;
201    }
202    /* Free offsetBuffer before initializing it. */
203    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
204    uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
205        &m_data_->iteratordata_);
206
207    m_data_->reset_   = TRUE;
208}
209
210// Sets the source to the new character iterator.
211void CollationElementIterator::setText(CharacterIterator& source,
212                                       UErrorCode& status)
213{
214    if (U_FAILURE(status))
215        return;
216
217    int32_t length = source.getLength();
218    UChar *buffer = NULL;
219
220    if (length == 0) {
221        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
222        /* test for NULL */
223        if (buffer == NULL) {
224            status = U_MEMORY_ALLOCATION_ERROR;
225            return;
226        }
227        *buffer = 0;
228    }
229    else {
230        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
231        /* test for NULL */
232        if (buffer == NULL) {
233            status = U_MEMORY_ALLOCATION_ERROR;
234            return;
235        }
236        /*
237        Using this constructor will prevent buffer from being removed when
238        string gets removed
239        */
240        UnicodeString string;
241        source.getText(string);
242        u_memcpy(buffer, string.getBuffer(), length);
243    }
244
245    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
246        uprv_free(m_data_->iteratordata_.string);
247    }
248    m_data_->isWritable = TRUE;
249    /* Free offsetBuffer before initializing it. */
250    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
251    uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
252        &m_data_->iteratordata_);
253    m_data_->reset_   = TRUE;
254}
255
256int32_t CollationElementIterator::strengthOrder(int32_t order) const
257{
258    UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
259    // Mask off the unwanted differences.
260    if (s == UCOL_PRIMARY) {
261        order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
262    }
263    else if (s == UCOL_SECONDARY) {
264        order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
265    }
266
267    return order;
268}
269
270/* CollationElementIterator private constructors/destructors --------------- */
271
272/**
273* This is the "real" constructor for this class; it constructs an iterator
274* over the source text using the specified collator
275*/
276CollationElementIterator::CollationElementIterator(
277                                               const UnicodeString& sourceText,
278                                               const RuleBasedCollator* order,
279                                               UErrorCode& status)
280                                               : isDataOwned_(TRUE)
281{
282    if (U_FAILURE(status)) {
283        return;
284    }
285
286    int32_t length = sourceText.length();
287    UChar *string = NULL;
288
289    if (length > 0) {
290        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
291        /* test for NULL */
292        if (string == NULL) {
293            status = U_MEMORY_ALLOCATION_ERROR;
294            return;
295        }
296        /*
297        Using this constructor will prevent buffer from being removed when
298        string gets removed
299        */
300        u_memcpy(string, sourceText.getBuffer(), length);
301    }
302    else {
303        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
304        /* test for NULL */
305        if (string == NULL) {
306            status = U_MEMORY_ALLOCATION_ERROR;
307            return;
308        }
309        *string = 0;
310    }
311    m_data_ = ucol_openElements(order->ucollator, string, length, &status);
312
313    /* Test for buffer overflows */
314    if (U_FAILURE(status)) {
315        return;
316    }
317    m_data_->isWritable = TRUE;
318}
319
320/**
321* This is the "real" constructor for this class; it constructs an iterator over
322* the source text using the specified collator
323*/
324CollationElementIterator::CollationElementIterator(
325                                           const CharacterIterator& sourceText,
326                                           const RuleBasedCollator* order,
327                                           UErrorCode& status)
328                                           : isDataOwned_(TRUE)
329{
330    if (U_FAILURE(status))
331        return;
332
333    // **** should I just drop this test? ****
334    /*
335    if ( sourceText.endIndex() != 0 )
336    {
337        // A CollationElementIterator is really a two-layered beast.
338        // Internally it uses a Normalizer to munge the source text into a form
339        // where all "composed" Unicode characters (such as \u00FC) are split into a
340        // normal character and a combining accent character.
341        // Afterward, CollationElementIterator does its own processing to handle
342        // expanding and contracting collation sequences, ignorables, and so on.
343
344        Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
345                                ? Normalizer::NO_OP : order->getDecomposition();
346
347        text = new Normalizer(sourceText, decomp);
348        if (text == NULL)
349        status = U_MEMORY_ALLOCATION_ERROR;
350    }
351    */
352    int32_t length = sourceText.getLength();
353    UChar *buffer;
354    if (length > 0) {
355        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
356        /* test for NULL */
357        if (buffer == NULL) {
358            status = U_MEMORY_ALLOCATION_ERROR;
359            return;
360        }
361        /*
362        Using this constructor will prevent buffer from being removed when
363        string gets removed
364        */
365        UnicodeString string(buffer, length, length);
366        ((CharacterIterator &)sourceText).getText(string);
367        const UChar *temp = string.getBuffer();
368        u_memcpy(buffer, temp, length);
369    }
370    else {
371        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
372        /* test for NULL */
373        if (buffer == NULL) {
374            status = U_MEMORY_ALLOCATION_ERROR;
375            return;
376        }
377        *buffer = 0;
378    }
379    m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
380
381    /* Test for buffer overflows */
382    if (U_FAILURE(status)) {
383        return;
384    }
385    m_data_->isWritable = TRUE;
386}
387
388/* CollationElementIterator protected methods ----------------------------- */
389
390const CollationElementIterator& CollationElementIterator::operator=(
391                                         const CollationElementIterator& other)
392{
393    if (this != &other)
394    {
395        UCollationElements *ucolelem      = this->m_data_;
396        UCollationElements *otherucolelem = other.m_data_;
397        collIterate        *coliter       = &(ucolelem->iteratordata_);
398        collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
399        int                length         = 0;
400
401        // checking only UCOL_ITER_HASLEN is not enough here as we may be in
402        // the normalization buffer
403        length = othercoliter->endp - othercoliter->string;
404
405        ucolelem->reset_         = otherucolelem->reset_;
406        ucolelem->isWritable     = TRUE;
407
408        /* create a duplicate of string */
409        if (length > 0) {
410            coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
411            if(coliter->string != NULL) {
412                uprv_memcpy(coliter->string, othercoliter->string,
413                    length * U_SIZEOF_UCHAR);
414            } else { // Error: couldn't allocate memory. No copying should be done
415                length = 0;
416            }
417        }
418        else {
419            coliter->string = NULL;
420        }
421
422        /* start and end of string */
423        coliter->endp = coliter->string + length;
424
425        /* handle writable buffer here */
426
427        if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
428            uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1;
429            if (wlength < coliter->writableBufSize) {
430                uprv_memcpy(coliter->stackWritableBuffer,
431                    othercoliter->stackWritableBuffer,
432                    wlength * U_SIZEOF_UCHAR);
433            }
434            else {
435                if (coliter->writableBuffer != coliter->stackWritableBuffer) {
436                    uprv_free(coliter->writableBuffer);
437                }
438                coliter->writableBuffer = (UChar *)uprv_malloc(
439                    wlength * U_SIZEOF_UCHAR);
440                if(coliter->writableBuffer != NULL) {
441                    uprv_memcpy(coliter->writableBuffer,
442                        othercoliter->writableBuffer,
443                        wlength * U_SIZEOF_UCHAR);
444                    coliter->writableBufSize = wlength;
445                } else { // Error: couldn't allocate memory for writableBuffer
446                    coliter->writableBufSize = 0;
447                }
448            }
449        }
450
451        /* current position */
452        if (othercoliter->pos >= othercoliter->string &&
453            othercoliter->pos <= othercoliter->endp)
454        {
455            coliter->pos = coliter->string +
456                (othercoliter->pos - othercoliter->string);
457        }
458        else if (coliter->writableBuffer != NULL) {
459            coliter->pos = coliter->writableBuffer +
460                (othercoliter->pos - othercoliter->writableBuffer);
461        }
462        else {
463            // Error: couldn't allocate memory for writableBuffer
464            coliter->pos = NULL;
465        }
466
467        /* CE buffer */
468        int32_t CEsize;
469        if (coliter->extendCEs) {
470            uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
471            CEsize = sizeof(othercoliter->extendCEs);
472            if (CEsize > 0) {
473                othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
474                uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
475            }
476            coliter->toReturn = coliter->extendCEs +
477                (othercoliter->toReturn - othercoliter->extendCEs);
478            coliter->CEpos    = coliter->extendCEs + CEsize;
479        } else {
480            CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
481            if (CEsize > 0) {
482                uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
483            }
484            coliter->toReturn = coliter->CEs +
485                (othercoliter->toReturn - othercoliter->CEs);
486            coliter->CEpos    = coliter->CEs + CEsize;
487        }
488
489        if (othercoliter->fcdPosition != NULL) {
490            coliter->fcdPosition = coliter->string +
491                (othercoliter->fcdPosition
492                - othercoliter->string);
493        }
494        else {
495            coliter->fcdPosition = NULL;
496        }
497        coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
498        coliter->origFlags   = othercoliter->origFlags;
499        coliter->coll = othercoliter->coll;
500        this->isDataOwned_ = TRUE;
501    }
502
503    return *this;
504}
505
506U_NAMESPACE_END
507
508#endif /* #if !UCONFIG_NO_COLLATION */
509
510/* eof */
511