1/*
2*******************************************************************************
3* Copyright (C) 1996-2010, International Business Machines Corporation and    *
4* others. All Rights Reserved.                                                *
5*******************************************************************************
6*/
7
8/*
9* File coleitr.cpp
10*
11*
12*
13* Created by: Helena Shih
14*
15* Modification History:
16*
17*  Date      Name        Description
18*
19*  6/23/97   helena      Adding comments to make code more readable.
20* 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
21* 12/10/99   aliu        Ported Thai collation support from Java.
22* 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
23* 02/19/01   swquek      Removed CollationElementsIterator() since it is
24*                        private constructor and no calls are made to it
25*/
26
27#include "unicode/utypes.h"
28
29#if !UCONFIG_NO_COLLATION
30
31#include "unicode/coleitr.h"
32#include "unicode/ustring.h"
33#include "ucol_imp.h"
34#include "cmemory.h"
35
36
37/* Constants --------------------------------------------------------------- */
38
39U_NAMESPACE_BEGIN
40
41UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
42
43/* CollationElementIterator public constructor/destructor ------------------ */
44
45CollationElementIterator::CollationElementIterator(
46                                         const CollationElementIterator& other)
47                                         : UObject(other), isDataOwned_(TRUE)
48{
49    UErrorCode status = U_ZERO_ERROR;
50    m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
51                                &status);
52
53    *this = other;
54}
55
56CollationElementIterator::~CollationElementIterator()
57{
58    if (isDataOwned_) {
59        ucol_closeElements(m_data_);
60    }
61}
62
63/* CollationElementIterator public methods --------------------------------- */
64
65int32_t CollationElementIterator::getOffset() const
66{
67    return ucol_getOffset(m_data_);
68}
69
70/**
71* Get the ordering priority of the next character in the string.
72* @return the next character's ordering. Returns NULLORDER if an error has
73*         occured or if the end of string has been reached
74*/
75int32_t CollationElementIterator::next(UErrorCode& status)
76{
77    return ucol_next(m_data_, &status);
78}
79
80UBool CollationElementIterator::operator!=(
81                                  const CollationElementIterator& other) const
82{
83    return !(*this == other);
84}
85
86UBool CollationElementIterator::operator==(
87                                    const CollationElementIterator& that) const
88{
89    if (this == &that || m_data_ == that.m_data_) {
90        return TRUE;
91    }
92
93    // option comparison
94    if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
95    {
96        return FALSE;
97    }
98
99    // the constructor and setText always sets a length
100    // and we only compare the string not the contents of the normalization
101    // buffer
102    int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
103    int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
104
105    if (thislength != thatlength) {
106        return FALSE;
107    }
108
109    if (uprv_memcmp(m_data_->iteratordata_.string,
110                    that.m_data_->iteratordata_.string,
111                    thislength * U_SIZEOF_UCHAR) != 0) {
112        return FALSE;
113    }
114    if (getOffset() != that.getOffset()) {
115        return FALSE;
116    }
117
118    // checking normalization buffer
119    if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
120        if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
121            return FALSE;
122        }
123        // both are in the normalization buffer
124        if (m_data_->iteratordata_.pos
125            - m_data_->iteratordata_.writableBuffer.getBuffer()
126            != that.m_data_->iteratordata_.pos
127            - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
128            // not in the same position in the normalization buffer
129            return FALSE;
130        }
131    }
132    else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
133        return FALSE;
134    }
135    // checking ce position
136    return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
137            == (that.m_data_->iteratordata_.CEpos
138                                        - that.m_data_->iteratordata_.CEs);
139}
140
141/**
142* Get the ordering priority of the previous collation element in the string.
143* @param status the error code status.
144* @return the previous element's ordering. Returns NULLORDER if an error has
145*         occured or if the start of string has been reached.
146*/
147int32_t CollationElementIterator::previous(UErrorCode& status)
148{
149    return ucol_previous(m_data_, &status);
150}
151
152/**
153* Resets the cursor to the beginning of the string.
154*/
155void CollationElementIterator::reset()
156{
157    ucol_reset(m_data_);
158}
159
160void CollationElementIterator::setOffset(int32_t newOffset,
161                                         UErrorCode& status)
162{
163    ucol_setOffset(m_data_, newOffset, &status);
164}
165
166/**
167* Sets the source to the new source string.
168*/
169void CollationElementIterator::setText(const UnicodeString& source,
170                                       UErrorCode& status)
171{
172    if (U_FAILURE(status)) {
173        return;
174    }
175
176    int32_t length = source.length();
177    UChar *string = NULL;
178    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
179        uprv_free((UChar *)m_data_->iteratordata_.string);
180    }
181    m_data_->isWritable = TRUE;
182    if (length > 0) {
183        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
184        /* test for NULL */
185        if (string == NULL) {
186            status = U_MEMORY_ALLOCATION_ERROR;
187            return;
188        }
189        u_memcpy(string, source.getBuffer(), length);
190    }
191    else {
192        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
193        /* test for NULL */
194        if (string == NULL) {
195            status = U_MEMORY_ALLOCATION_ERROR;
196            return;
197        }
198        *string = 0;
199    }
200    /* Free offsetBuffer before initializing it. */
201    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
202    uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
203        &m_data_->iteratordata_, &status);
204
205    m_data_->reset_   = TRUE;
206}
207
208// Sets the source to the new character iterator.
209void CollationElementIterator::setText(CharacterIterator& source,
210                                       UErrorCode& status)
211{
212    if (U_FAILURE(status))
213        return;
214
215    int32_t length = source.getLength();
216    UChar *buffer = NULL;
217
218    if (length == 0) {
219        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
220        /* test for NULL */
221        if (buffer == NULL) {
222            status = U_MEMORY_ALLOCATION_ERROR;
223            return;
224        }
225        *buffer = 0;
226    }
227    else {
228        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
229        /* test for NULL */
230        if (buffer == NULL) {
231            status = U_MEMORY_ALLOCATION_ERROR;
232            return;
233        }
234        /*
235        Using this constructor will prevent buffer from being removed when
236        string gets removed
237        */
238        UnicodeString string;
239        source.getText(string);
240        u_memcpy(buffer, string.getBuffer(), length);
241    }
242
243    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
244        uprv_free((UChar *)m_data_->iteratordata_.string);
245    }
246    m_data_->isWritable = TRUE;
247    /* Free offsetBuffer before initializing it. */
248    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
249    uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
250        &m_data_->iteratordata_, &status);
251    m_data_->reset_   = TRUE;
252}
253
254int32_t CollationElementIterator::strengthOrder(int32_t order) const
255{
256    UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
257    // Mask off the unwanted differences.
258    if (s == UCOL_PRIMARY) {
259        order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
260    }
261    else if (s == UCOL_SECONDARY) {
262        order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
263    }
264
265    return order;
266}
267
268/* CollationElementIterator private constructors/destructors --------------- */
269
270/**
271* This is the "real" constructor for this class; it constructs an iterator
272* over the source text using the specified collator
273*/
274CollationElementIterator::CollationElementIterator(
275                                               const UnicodeString& sourceText,
276                                               const RuleBasedCollator* order,
277                                               UErrorCode& status)
278                                               : isDataOwned_(TRUE)
279{
280    if (U_FAILURE(status)) {
281        return;
282    }
283
284    int32_t length = sourceText.length();
285    UChar *string = NULL;
286
287    if (length > 0) {
288        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
289        /* test for NULL */
290        if (string == NULL) {
291            status = U_MEMORY_ALLOCATION_ERROR;
292            return;
293        }
294        /*
295        Using this constructor will prevent buffer from being removed when
296        string gets removed
297        */
298        u_memcpy(string, sourceText.getBuffer(), length);
299    }
300    else {
301        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
302        /* test for NULL */
303        if (string == NULL) {
304            status = U_MEMORY_ALLOCATION_ERROR;
305            return;
306        }
307        *string = 0;
308    }
309    m_data_ = ucol_openElements(order->ucollator, string, length, &status);
310
311    /* Test for buffer overflows */
312    if (U_FAILURE(status)) {
313        return;
314    }
315    m_data_->isWritable = TRUE;
316}
317
318/**
319* This is the "real" constructor for this class; it constructs an iterator over
320* the source text using the specified collator
321*/
322CollationElementIterator::CollationElementIterator(
323                                           const CharacterIterator& sourceText,
324                                           const RuleBasedCollator* order,
325                                           UErrorCode& status)
326                                           : isDataOwned_(TRUE)
327{
328    if (U_FAILURE(status))
329        return;
330
331    // **** should I just drop this test? ****
332    /*
333    if ( sourceText.endIndex() != 0 )
334    {
335        // A CollationElementIterator is really a two-layered beast.
336        // Internally it uses a Normalizer to munge the source text into a form
337        // where all "composed" Unicode characters (such as \u00FC) are split into a
338        // normal character and a combining accent character.
339        // Afterward, CollationElementIterator does its own processing to handle
340        // expanding and contracting collation sequences, ignorables, and so on.
341
342        Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
343                                ? Normalizer::NO_OP : order->getDecomposition();
344
345        text = new Normalizer(sourceText, decomp);
346        if (text == NULL)
347        status = U_MEMORY_ALLOCATION_ERROR;
348    }
349    */
350    int32_t length = sourceText.getLength();
351    UChar *buffer;
352    if (length > 0) {
353        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
354        /* test for NULL */
355        if (buffer == NULL) {
356            status = U_MEMORY_ALLOCATION_ERROR;
357            return;
358        }
359        /*
360        Using this constructor will prevent buffer from being removed when
361        string gets removed
362        */
363        UnicodeString string(buffer, length, length);
364        ((CharacterIterator &)sourceText).getText(string);
365        const UChar *temp = string.getBuffer();
366        u_memcpy(buffer, temp, length);
367    }
368    else {
369        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
370        /* test for NULL */
371        if (buffer == NULL) {
372            status = U_MEMORY_ALLOCATION_ERROR;
373            return;
374        }
375        *buffer = 0;
376    }
377    m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
378
379    /* Test for buffer overflows */
380    if (U_FAILURE(status)) {
381        return;
382    }
383    m_data_->isWritable = TRUE;
384}
385
386/* CollationElementIterator protected methods ----------------------------- */
387
388const CollationElementIterator& CollationElementIterator::operator=(
389                                         const CollationElementIterator& other)
390{
391    if (this != &other)
392    {
393        UCollationElements *ucolelem      = this->m_data_;
394        UCollationElements *otherucolelem = other.m_data_;
395        collIterate        *coliter       = &(ucolelem->iteratordata_);
396        collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
397        int                length         = 0;
398
399        // checking only UCOL_ITER_HASLEN is not enough here as we may be in
400        // the normalization buffer
401        length = (int)(othercoliter->endp - othercoliter->string);
402
403        ucolelem->reset_         = otherucolelem->reset_;
404        ucolelem->isWritable     = TRUE;
405
406        /* create a duplicate of string */
407        if (length > 0) {
408            coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
409            if(coliter->string != NULL) {
410                uprv_memcpy((UChar *)coliter->string, othercoliter->string,
411                    length * U_SIZEOF_UCHAR);
412            } else { // Error: couldn't allocate memory. No copying should be done
413                length = 0;
414            }
415        }
416        else {
417            coliter->string = NULL;
418        }
419
420        /* start and end of string */
421        coliter->endp = coliter->string + length;
422
423        /* handle writable buffer here */
424
425        if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
426            coliter->writableBuffer = othercoliter->writableBuffer;
427            coliter->writableBuffer.getTerminatedBuffer();
428        }
429
430        /* current position */
431        if (othercoliter->pos >= othercoliter->string &&
432            othercoliter->pos <= othercoliter->endp)
433        {
434            coliter->pos = coliter->string +
435                (othercoliter->pos - othercoliter->string);
436        }
437        else {
438            coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
439                (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
440        }
441
442        /* CE buffer */
443        int32_t CEsize;
444        if (coliter->extendCEs) {
445            uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
446            CEsize = sizeof(othercoliter->extendCEs);
447            if (CEsize > 0) {
448                othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
449                uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
450            }
451            coliter->toReturn = coliter->extendCEs +
452                (othercoliter->toReturn - othercoliter->extendCEs);
453            coliter->CEpos    = coliter->extendCEs + CEsize;
454        } else {
455            CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
456            if (CEsize > 0) {
457                uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
458            }
459            coliter->toReturn = coliter->CEs +
460                (othercoliter->toReturn - othercoliter->CEs);
461            coliter->CEpos    = coliter->CEs + CEsize;
462        }
463
464        if (othercoliter->fcdPosition != NULL) {
465            coliter->fcdPosition = coliter->string +
466                (othercoliter->fcdPosition
467                - othercoliter->string);
468        }
469        else {
470            coliter->fcdPosition = NULL;
471        }
472        coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
473        coliter->origFlags   = othercoliter->origFlags;
474        coliter->coll = othercoliter->coll;
475        this->isDataOwned_ = TRUE;
476    }
477
478    return *this;
479}
480
481U_NAMESPACE_END
482
483#endif /* #if !UCONFIG_NO_COLLATION */
484
485/* eof */
486