1/*
2**********************************************************************
3*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
4**********************************************************************
5*   Date        Name        Description
6*  03/22/2000   helena      Creation.
7**********************************************************************
8*/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/brkiter.h"
15#include "unicode/schriter.h"
16#include "unicode/search.h"
17#include "usrchimp.h"
18#include "cmemory.h"
19
20// public constructors and destructors -----------------------------------
21U_NAMESPACE_BEGIN
22
23SearchIterator::SearchIterator(const SearchIterator &other)
24    : UObject(other)
25{
26    m_breakiterator_            = other.m_breakiterator_;
27    m_text_                     = other.m_text_;
28    m_search_                   = (USearch *)uprv_malloc(sizeof(USearch));
29    m_search_->breakIter        = other.m_search_->breakIter;
30    m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
31    m_search_->isOverlap        = other.m_search_->isOverlap;
32    m_search_->matchedIndex     = other.m_search_->matchedIndex;
33    m_search_->matchedLength    = other.m_search_->matchedLength;
34    m_search_->text             = other.m_search_->text;
35    m_search_->textLength       = other.m_search_->textLength;
36}
37
38SearchIterator::~SearchIterator()
39{
40    if (m_search_ != NULL) {
41        uprv_free(m_search_);
42    }
43}
44
45// public get and set methods ----------------------------------------
46
47void SearchIterator::setAttribute(USearchAttribute       attribute,
48                                  USearchAttributeValue  value,
49                                  UErrorCode            &status)
50{
51    if (U_SUCCESS(status)) {
52        switch (attribute)
53        {
54        case USEARCH_OVERLAP :
55            m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
56            break;
57        case USEARCH_CANONICAL_MATCH :
58            m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
59            break;
60        default:
61            status = U_ILLEGAL_ARGUMENT_ERROR;
62        }
63    }
64    if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
65        status = U_ILLEGAL_ARGUMENT_ERROR;
66    }
67}
68
69USearchAttributeValue SearchIterator::getAttribute(
70                                          USearchAttribute  attribute) const
71{
72    switch (attribute) {
73    case USEARCH_OVERLAP :
74        return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
75    case USEARCH_CANONICAL_MATCH :
76        return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
77                                                                USEARCH_OFF);
78    default :
79        return USEARCH_DEFAULT;
80    }
81}
82
83int32_t SearchIterator::getMatchedStart() const
84{
85    return m_search_->matchedIndex;
86}
87
88int32_t SearchIterator::getMatchedLength() const
89{
90    return m_search_->matchedLength;
91}
92
93void SearchIterator::getMatchedText(UnicodeString &result) const
94{
95    int32_t matchedindex  = m_search_->matchedIndex;
96    int32_t     matchedlength = m_search_->matchedLength;
97    if (matchedindex != USEARCH_DONE && matchedlength != 0) {
98        result.setTo(m_search_->text + matchedindex, matchedlength);
99    }
100    else {
101        result.remove();
102    }
103}
104
105void SearchIterator::setBreakIterator(BreakIterator *breakiter,
106                                      UErrorCode &status)
107{
108    if (U_SUCCESS(status)) {
109#if 0
110        m_search_->breakIter = NULL;
111        // the c++ breakiterator may not make use of ubreakiterator.
112        // so we'll have to keep track of it ourselves.
113#else
114        // Well, gee... the Constructors that take a BreakIterator
115        // all cast the BreakIterator to a UBreakIterator and
116        // pass it to the corresponding usearch_openFromXXX
117        // routine, so there's no reason not to do this.
118        //
119        // Besides, a UBreakIterator is a BreakIterator, so
120        // any subclass of BreakIterator should work fine here...
121        m_search_->breakIter = (UBreakIterator *) breakiter;
122#endif
123
124        m_breakiterator_ = breakiter;
125    }
126}
127
128const BreakIterator * SearchIterator::getBreakIterator(void) const
129{
130    return m_breakiterator_;
131}
132
133void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
134{
135    if (U_SUCCESS(status)) {
136        if (text.length() == 0) {
137            status = U_ILLEGAL_ARGUMENT_ERROR;
138        }
139        else {
140            m_text_        = text;
141            m_search_->text = m_text_.getBuffer();
142            m_search_->textLength = m_text_.length();
143        }
144    }
145}
146
147void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
148{
149    if (U_SUCCESS(status)) {
150        text.getText(m_text_);
151        setText(m_text_, status);
152    }
153}
154
155const UnicodeString & SearchIterator::getText(void) const
156{
157    return m_text_;
158}
159
160// operator overloading ----------------------------------------------
161
162UBool SearchIterator::operator==(const SearchIterator &that) const
163{
164    if (this == &that) {
165        return TRUE;
166    }
167    return (m_breakiterator_            == that.m_breakiterator_ &&
168            m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
169            m_search_->isOverlap        == that.m_search_->isOverlap &&
170            m_search_->matchedIndex     == that.m_search_->matchedIndex &&
171            m_search_->matchedLength    == that.m_search_->matchedLength &&
172            m_search_->textLength       == that.m_search_->textLength &&
173            getOffset() == that.getOffset() &&
174            (uprv_memcmp(m_search_->text, that.m_search_->text,
175                              m_search_->textLength * sizeof(UChar)) == 0));
176}
177
178// public methods ----------------------------------------------------
179
180int32_t SearchIterator::first(UErrorCode &status)
181{
182    if (U_FAILURE(status)) {
183        return USEARCH_DONE;
184    }
185    setOffset(0, status);
186    return handleNext(0, status);
187}
188
189int32_t SearchIterator::following(int32_t position,
190                                      UErrorCode &status)
191{
192    if (U_FAILURE(status)) {
193        return USEARCH_DONE;
194    }
195    setOffset(position, status);
196    return handleNext(position, status);
197}
198
199int32_t SearchIterator::last(UErrorCode &status)
200{
201    if (U_FAILURE(status)) {
202        return USEARCH_DONE;
203    }
204    setOffset(m_search_->textLength, status);
205    return handlePrev(m_search_->textLength, status);
206}
207
208int32_t SearchIterator::preceding(int32_t position,
209                                      UErrorCode &status)
210{
211    if (U_FAILURE(status)) {
212        return USEARCH_DONE;
213    }
214    setOffset(position, status);
215    return handlePrev(position, status);
216}
217
218int32_t SearchIterator::next(UErrorCode &status)
219{
220    if (U_SUCCESS(status)) {
221        int32_t offset = getOffset();
222        int32_t matchindex  = m_search_->matchedIndex;
223        int32_t     matchlength = m_search_->matchedLength;
224        m_search_->reset = FALSE;
225        if (m_search_->isForwardSearching == TRUE) {
226            int32_t textlength = m_search_->textLength;
227            if (offset == textlength || matchindex == textlength ||
228                (matchindex != USEARCH_DONE &&
229                matchindex + matchlength >= textlength)) {
230                // not enough characters to match
231                setMatchNotFound();
232                return USEARCH_DONE;
233            }
234        }
235        else {
236            // switching direction.
237            // if matchedIndex == USEARCH_DONE, it means that either a
238            // setOffset has been called or that previous ran off the text
239            // string. the iterator would have been set to offset 0 if a
240            // match is not found.
241            m_search_->isForwardSearching = TRUE;
242            if (m_search_->matchedIndex != USEARCH_DONE) {
243                // there's no need to set the collation element iterator
244                // the next call to next will set the offset.
245                return matchindex;
246            }
247        }
248
249        if (matchlength > 0) {
250            // if matchlength is 0 we are at the start of the iteration
251            if (m_search_->isOverlap) {
252                offset ++;
253            }
254            else {
255                offset += matchlength;
256            }
257        }
258        return handleNext(offset, status);
259    }
260    return USEARCH_DONE;
261}
262
263int32_t SearchIterator::previous(UErrorCode &status)
264{
265    if (U_SUCCESS(status)) {
266        int32_t offset;
267        if (m_search_->reset) {
268            offset                       = m_search_->textLength;
269            m_search_->isForwardSearching = FALSE;
270            m_search_->reset              = FALSE;
271            setOffset(offset, status);
272        }
273        else {
274            offset = getOffset();
275        }
276
277        int32_t matchindex = m_search_->matchedIndex;
278        if (m_search_->isForwardSearching == TRUE) {
279            // switching direction.
280            // if matchedIndex == USEARCH_DONE, it means that either a
281            // setOffset has been called or that next ran off the text
282            // string. the iterator would have been set to offset textLength if
283            // a match is not found.
284            m_search_->isForwardSearching = FALSE;
285            if (matchindex != USEARCH_DONE) {
286                return matchindex;
287            }
288        }
289        else {
290            if (offset == 0 || matchindex == 0) {
291                // not enough characters to match
292                setMatchNotFound();
293                return USEARCH_DONE;
294            }
295        }
296
297        if (matchindex != USEARCH_DONE) {
298            if (m_search_->isOverlap) {
299                matchindex += m_search_->matchedLength - 2;
300            }
301
302            return handlePrev(matchindex, status);
303        }
304
305        return handlePrev(offset, status);
306    }
307
308    return USEARCH_DONE;
309}
310
311void SearchIterator::reset()
312{
313    UErrorCode status = U_ZERO_ERROR;
314    setMatchNotFound();
315    setOffset(0, status);
316    m_search_->isOverlap          = FALSE;
317    m_search_->isCanonicalMatch   = FALSE;
318    m_search_->isForwardSearching = TRUE;
319    m_search_->reset              = TRUE;
320}
321
322// protected constructors and destructors -----------------------------
323
324SearchIterator::SearchIterator()
325{
326    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
327    m_search_->breakIter          = NULL;
328    m_search_->isOverlap          = FALSE;
329    m_search_->isCanonicalMatch   = FALSE;
330    m_search_->isForwardSearching = TRUE;
331    m_search_->reset              = TRUE;
332    m_search_->matchedIndex       = USEARCH_DONE;
333    m_search_->matchedLength      = 0;
334    m_search_->text               = NULL;
335    m_search_->textLength         = 0;
336    m_breakiterator_              = NULL;
337}
338
339SearchIterator::SearchIterator(const UnicodeString &text,
340                                     BreakIterator *breakiter) :
341                                     m_breakiterator_(breakiter),
342                                     m_text_(text)
343{
344    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
345    m_search_->breakIter          = NULL;
346    m_search_->isOverlap          = FALSE;
347    m_search_->isCanonicalMatch   = FALSE;
348    m_search_->isForwardSearching = TRUE;
349    m_search_->reset              = TRUE;
350    m_search_->matchedIndex       = USEARCH_DONE;
351    m_search_->matchedLength      = 0;
352    m_search_->text               = m_text_.getBuffer();
353    m_search_->textLength         = text.length();
354}
355
356SearchIterator::SearchIterator(CharacterIterator &text,
357                               BreakIterator     *breakiter) :
358                               m_breakiterator_(breakiter)
359{
360    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
361    m_search_->breakIter          = NULL;
362    m_search_->isOverlap          = FALSE;
363    m_search_->isCanonicalMatch   = FALSE;
364    m_search_->isForwardSearching = TRUE;
365    m_search_->reset              = TRUE;
366    m_search_->matchedIndex       = USEARCH_DONE;
367    m_search_->matchedLength      = 0;
368    text.getText(m_text_);
369    m_search_->text               = m_text_.getBuffer();
370    m_search_->textLength         = m_text_.length();
371    m_breakiterator_             = breakiter;
372}
373
374// protected methods ------------------------------------------------------
375
376SearchIterator & SearchIterator::operator=(const SearchIterator &that)
377{
378    if (this != &that) {
379        m_breakiterator_            = that.m_breakiterator_;
380        m_text_                     = that.m_text_;
381        m_search_->breakIter        = that.m_search_->breakIter;
382        m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
383        m_search_->isOverlap        = that.m_search_->isOverlap;
384        m_search_->matchedIndex     = that.m_search_->matchedIndex;
385        m_search_->matchedLength    = that.m_search_->matchedLength;
386        m_search_->text             = that.m_search_->text;
387        m_search_->textLength       = that.m_search_->textLength;
388    }
389    return *this;
390}
391
392void SearchIterator::setMatchLength(int32_t length)
393{
394    m_search_->matchedLength = length;
395}
396
397void SearchIterator::setMatchStart(int32_t position)
398{
399    m_search_->matchedIndex = position;
400}
401
402void SearchIterator::setMatchNotFound()
403{
404    setMatchStart(USEARCH_DONE);
405    setMatchLength(0);
406    UErrorCode status = U_ZERO_ERROR;
407    // by default no errors should be returned here since offsets are within
408    // range.
409    if (m_search_->isForwardSearching) {
410        setOffset(m_search_->textLength, status);
411    }
412    else {
413        setOffset(0, status);
414    }
415}
416
417
418U_NAMESPACE_END
419
420#endif /* #if !UCONFIG_NO_COLLATION */
421