1/*
2*******************************************************************************
3* Copyright (C) 1996-2011, International Business Machines Corporation and    *
4* others. All Rights Reserved.                                                *
5*******************************************************************************
6*/
7
8/*
9* File coleitr.cpp
10*
11*
12*
13* Created by: Helena Shih
14*
15* Modification History:
16*
17*  Date      Name        Description
18*
19*  6/23/97   helena      Adding comments to make code more readable.
20* 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
21* 12/10/99   aliu        Ported Thai collation support from Java.
22* 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
23* 02/19/01   swquek      Removed CollationElementsIterator() since it is
24*                        private constructor and no calls are made to it
25*/
26
27#include "unicode/utypes.h"
28
29#if !UCONFIG_NO_COLLATION
30
31#include "unicode/coleitr.h"
32#include "unicode/ustring.h"
33#include "ucol_imp.h"
34#include "uassert.h"
35#include "cmemory.h"
36
37
38/* Constants --------------------------------------------------------------- */
39
40U_NAMESPACE_BEGIN
41
42UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
43
44/* CollationElementIterator public constructor/destructor ------------------ */
45
46CollationElementIterator::CollationElementIterator(
47                                         const CollationElementIterator& other)
48                                         : UObject(other), isDataOwned_(TRUE)
49{
50    UErrorCode status = U_ZERO_ERROR;
51    m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
52                                &status);
53
54    *this = other;
55}
56
57CollationElementIterator::~CollationElementIterator()
58{
59    if (isDataOwned_) {
60        ucol_closeElements(m_data_);
61    }
62}
63
64/* CollationElementIterator public methods --------------------------------- */
65
66int32_t CollationElementIterator::getOffset() const
67{
68    return ucol_getOffset(m_data_);
69}
70
71/**
72* Get the ordering priority of the next character in the string.
73* @return the next character's ordering. Returns NULLORDER if an error has
74*         occured or if the end of string has been reached
75*/
76int32_t CollationElementIterator::next(UErrorCode& status)
77{
78    return ucol_next(m_data_, &status);
79}
80
81UBool CollationElementIterator::operator!=(
82                                  const CollationElementIterator& other) const
83{
84    return !(*this == other);
85}
86
87UBool CollationElementIterator::operator==(
88                                    const CollationElementIterator& that) const
89{
90    if (this == &that || m_data_ == that.m_data_) {
91        return TRUE;
92    }
93
94    // option comparison
95    if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
96    {
97        return FALSE;
98    }
99
100    // the constructor and setText always sets a length
101    // and we only compare the string not the contents of the normalization
102    // buffer
103    int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
104    int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
105
106    if (thislength != thatlength) {
107        return FALSE;
108    }
109
110    if (uprv_memcmp(m_data_->iteratordata_.string,
111                    that.m_data_->iteratordata_.string,
112                    thislength * U_SIZEOF_UCHAR) != 0) {
113        return FALSE;
114    }
115    if (getOffset() != that.getOffset()) {
116        return FALSE;
117    }
118
119    // checking normalization buffer
120    if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
121        if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
122            return FALSE;
123        }
124        // both are in the normalization buffer
125        if (m_data_->iteratordata_.pos
126            - m_data_->iteratordata_.writableBuffer.getBuffer()
127            != that.m_data_->iteratordata_.pos
128            - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
129            // not in the same position in the normalization buffer
130            return FALSE;
131        }
132    }
133    else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
134        return FALSE;
135    }
136    // checking ce position
137    return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
138            == (that.m_data_->iteratordata_.CEpos
139                                        - that.m_data_->iteratordata_.CEs);
140}
141
142/**
143* Get the ordering priority of the previous collation element in the string.
144* @param status the error code status.
145* @return the previous element's ordering. Returns NULLORDER if an error has
146*         occured or if the start of string has been reached.
147*/
148int32_t CollationElementIterator::previous(UErrorCode& status)
149{
150    return ucol_previous(m_data_, &status);
151}
152
153/**
154* Resets the cursor to the beginning of the string.
155*/
156void CollationElementIterator::reset()
157{
158    ucol_reset(m_data_);
159}
160
161void CollationElementIterator::setOffset(int32_t newOffset,
162                                         UErrorCode& status)
163{
164    ucol_setOffset(m_data_, newOffset, &status);
165}
166
167/**
168* Sets the source to the new source string.
169*/
170void CollationElementIterator::setText(const UnicodeString& source,
171                                       UErrorCode& status)
172{
173    if (U_FAILURE(status)) {
174        return;
175    }
176
177    int32_t length = source.length();
178    UChar *string = NULL;
179    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
180        uprv_free((UChar *)m_data_->iteratordata_.string);
181    }
182    m_data_->isWritable = TRUE;
183    if (length > 0) {
184        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
185        /* test for NULL */
186        if (string == NULL) {
187            status = U_MEMORY_ALLOCATION_ERROR;
188            return;
189        }
190        u_memcpy(string, source.getBuffer(), length);
191    }
192    else {
193        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
194        /* test for NULL */
195        if (string == NULL) {
196            status = U_MEMORY_ALLOCATION_ERROR;
197            return;
198        }
199        *string = 0;
200    }
201    /* Free offsetBuffer before initializing it. */
202    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
203    uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
204        &m_data_->iteratordata_, &status);
205
206    m_data_->reset_   = TRUE;
207}
208
209// Sets the source to the new character iterator.
210void CollationElementIterator::setText(CharacterIterator& source,
211                                       UErrorCode& status)
212{
213    if (U_FAILURE(status))
214        return;
215
216    int32_t length = source.getLength();
217    UChar *buffer = NULL;
218
219    if (length == 0) {
220        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
221        /* test for NULL */
222        if (buffer == NULL) {
223            status = U_MEMORY_ALLOCATION_ERROR;
224            return;
225        }
226        *buffer = 0;
227    }
228    else {
229        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
230        /* test for NULL */
231        if (buffer == NULL) {
232            status = U_MEMORY_ALLOCATION_ERROR;
233            return;
234        }
235        /*
236        Using this constructor will prevent buffer from being removed when
237        string gets removed
238        */
239        UnicodeString string;
240        source.getText(string);
241        u_memcpy(buffer, string.getBuffer(), length);
242    }
243
244    if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
245        uprv_free((UChar *)m_data_->iteratordata_.string);
246    }
247    m_data_->isWritable = TRUE;
248    /* Free offsetBuffer before initializing it. */
249    ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
250    uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
251        &m_data_->iteratordata_, &status);
252    m_data_->reset_   = TRUE;
253}
254
255int32_t CollationElementIterator::strengthOrder(int32_t order) const
256{
257    UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
258    // Mask off the unwanted differences.
259    if (s == UCOL_PRIMARY) {
260        order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
261    }
262    else if (s == UCOL_SECONDARY) {
263        order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
264    }
265
266    return order;
267}
268
269/* CollationElementIterator private constructors/destructors --------------- */
270
271/**
272* This is the "real" constructor for this class; it constructs an iterator
273* over the source text using the specified collator
274*/
275CollationElementIterator::CollationElementIterator(
276                                               const UnicodeString& sourceText,
277                                               const RuleBasedCollator* order,
278                                               UErrorCode& status)
279                                               : isDataOwned_(TRUE)
280{
281    if (U_FAILURE(status)) {
282        return;
283    }
284
285    int32_t length = sourceText.length();
286    UChar *string = NULL;
287
288    if (length > 0) {
289        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
290        /* test for NULL */
291        if (string == NULL) {
292            status = U_MEMORY_ALLOCATION_ERROR;
293            return;
294        }
295        /*
296        Using this constructor will prevent buffer from being removed when
297        string gets removed
298        */
299        u_memcpy(string, sourceText.getBuffer(), length);
300    }
301    else {
302        string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
303        /* test for NULL */
304        if (string == NULL) {
305            status = U_MEMORY_ALLOCATION_ERROR;
306            return;
307        }
308        *string = 0;
309    }
310    m_data_ = ucol_openElements(order->ucollator, string, length, &status);
311
312    /* Test for buffer overflows */
313    if (U_FAILURE(status)) {
314        return;
315    }
316    m_data_->isWritable = TRUE;
317}
318
319/**
320* This is the "real" constructor for this class; it constructs an iterator over
321* the source text using the specified collator
322*/
323CollationElementIterator::CollationElementIterator(
324                                           const CharacterIterator& sourceText,
325                                           const RuleBasedCollator* order,
326                                           UErrorCode& status)
327                                           : isDataOwned_(TRUE)
328{
329    if (U_FAILURE(status))
330        return;
331
332    // **** should I just drop this test? ****
333    /*
334    if ( sourceText.endIndex() != 0 )
335    {
336        // A CollationElementIterator is really a two-layered beast.
337        // Internally it uses a Normalizer to munge the source text into a form
338        // where all "composed" Unicode characters (such as \u00FC) are split into a
339        // normal character and a combining accent character.
340        // Afterward, CollationElementIterator does its own processing to handle
341        // expanding and contracting collation sequences, ignorables, and so on.
342
343        Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
344                                ? Normalizer::NO_OP : order->getDecomposition();
345
346        text = new Normalizer(sourceText, decomp);
347        if (text == NULL)
348        status = U_MEMORY_ALLOCATION_ERROR;
349    }
350    */
351    int32_t length = sourceText.getLength();
352    UChar *buffer;
353    if (length > 0) {
354        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
355        /* test for NULL */
356        if (buffer == NULL) {
357            status = U_MEMORY_ALLOCATION_ERROR;
358            return;
359        }
360        /*
361        Using this constructor will prevent buffer from being removed when
362        string gets removed
363        */
364        UnicodeString string(buffer, length, length);
365        ((CharacterIterator &)sourceText).getText(string);
366        const UChar *temp = string.getBuffer();
367        u_memcpy(buffer, temp, length);
368    }
369    else {
370        buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
371        /* test for NULL */
372        if (buffer == NULL) {
373            status = U_MEMORY_ALLOCATION_ERROR;
374            return;
375        }
376        *buffer = 0;
377    }
378    m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
379
380    /* Test for buffer overflows */
381    if (U_FAILURE(status)) {
382        return;
383    }
384    m_data_->isWritable = TRUE;
385}
386
387/* CollationElementIterator protected methods ----------------------------- */
388
389const CollationElementIterator& CollationElementIterator::operator=(
390                                         const CollationElementIterator& other)
391{
392    if (this != &other)
393    {
394        UCollationElements *ucolelem      = this->m_data_;
395        UCollationElements *otherucolelem = other.m_data_;
396        collIterate        *coliter       = &(ucolelem->iteratordata_);
397        collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
398        int                length         = 0;
399
400        // checking only UCOL_ITER_HASLEN is not enough here as we may be in
401        // the normalization buffer
402        length = (int)(othercoliter->endp - othercoliter->string);
403
404        ucolelem->reset_         = otherucolelem->reset_;
405        ucolelem->isWritable     = TRUE;
406
407        /* create a duplicate of string */
408        if (length > 0) {
409            coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
410            if(coliter->string != NULL) {
411                uprv_memcpy((UChar *)coliter->string, othercoliter->string,
412                    length * U_SIZEOF_UCHAR);
413            } else { // Error: couldn't allocate memory. No copying should be done
414                length = 0;
415            }
416        }
417        else {
418            coliter->string = NULL;
419        }
420
421        /* start and end of string */
422        coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
423
424        /* handle writable buffer here */
425
426        if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
427            coliter->writableBuffer = othercoliter->writableBuffer;
428            coliter->writableBuffer.getTerminatedBuffer();
429        }
430
431        /* current position */
432        if (othercoliter->pos >= othercoliter->string &&
433            othercoliter->pos <= othercoliter->endp)
434        {
435            U_ASSERT(coliter->string != NULL);
436            coliter->pos = coliter->string +
437                (othercoliter->pos - othercoliter->string);
438        }
439        else {
440            coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
441                (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
442        }
443
444        /* CE buffer */
445        int32_t CEsize;
446        if (coliter->extendCEs) {
447            uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
448            CEsize = sizeof(othercoliter->extendCEs);
449            if (CEsize > 0) {
450                othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
451                uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
452            }
453            coliter->toReturn = coliter->extendCEs +
454                (othercoliter->toReturn - othercoliter->extendCEs);
455            coliter->CEpos    = coliter->extendCEs + CEsize;
456        } else {
457            CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
458            if (CEsize > 0) {
459                uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
460            }
461            coliter->toReturn = coliter->CEs +
462                (othercoliter->toReturn - othercoliter->CEs);
463            coliter->CEpos    = coliter->CEs + CEsize;
464        }
465
466        if (othercoliter->fcdPosition != NULL) {
467            U_ASSERT(coliter->string != NULL);
468            coliter->fcdPosition = coliter->string +
469                (othercoliter->fcdPosition
470                - othercoliter->string);
471        }
472        else {
473            coliter->fcdPosition = NULL;
474        }
475        coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
476        coliter->origFlags   = othercoliter->origFlags;
477        coliter->coll = othercoliter->coll;
478        this->isDataOwned_ = TRUE;
479    }
480
481    return *this;
482}
483
484U_NAMESPACE_END
485
486#endif /* #if !UCONFIG_NO_COLLATION */
487
488/* eof */
489