1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5*   Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
6**********************************************************************
7*   Date        Name        Description
8*  03/22/2000   helena      Creation.
9**********************************************************************
10*/
11
12#include "unicode/utypes.h"
13
14#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
15
16#include "unicode/brkiter.h"
17#include "unicode/schriter.h"
18#include "unicode/search.h"
19#include "usrchimp.h"
20#include "cmemory.h"
21
22// public constructors and destructors -----------------------------------
23U_NAMESPACE_BEGIN
24
25SearchIterator::SearchIterator(const SearchIterator &other)
26    : UObject(other)
27{
28    m_breakiterator_            = other.m_breakiterator_;
29    m_text_                     = other.m_text_;
30    m_search_                   = (USearch *)uprv_malloc(sizeof(USearch));
31    m_search_->breakIter        = other.m_search_->breakIter;
32    m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
33    m_search_->isOverlap        = other.m_search_->isOverlap;
34    m_search_->elementComparisonType = other.m_search_->elementComparisonType;
35    m_search_->matchedIndex     = other.m_search_->matchedIndex;
36    m_search_->matchedLength    = other.m_search_->matchedLength;
37    m_search_->text             = other.m_search_->text;
38    m_search_->textLength       = other.m_search_->textLength;
39}
40
41SearchIterator::~SearchIterator()
42{
43    if (m_search_ != NULL) {
44        uprv_free(m_search_);
45    }
46}
47
48// public get and set methods ----------------------------------------
49
50void SearchIterator::setAttribute(USearchAttribute       attribute,
51                                  USearchAttributeValue  value,
52                                  UErrorCode            &status)
53{
54    if (U_SUCCESS(status)) {
55        switch (attribute)
56        {
57        case USEARCH_OVERLAP :
58            m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
59            break;
60        case USEARCH_CANONICAL_MATCH :
61            m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
62            break;
63        case USEARCH_ELEMENT_COMPARISON :
64            if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
65                m_search_->elementComparisonType = (int16_t)value;
66            } else {
67                m_search_->elementComparisonType = 0;
68            }
69            break;
70        default:
71            status = U_ILLEGAL_ARGUMENT_ERROR;
72        }
73    }
74    if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
75        status = U_ILLEGAL_ARGUMENT_ERROR;
76    }
77}
78
79USearchAttributeValue SearchIterator::getAttribute(
80                                          USearchAttribute  attribute) const
81{
82    switch (attribute) {
83    case USEARCH_OVERLAP :
84        return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
85    case USEARCH_CANONICAL_MATCH :
86        return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
87                                                                USEARCH_OFF);
88    case USEARCH_ELEMENT_COMPARISON :
89        {
90            int16_t value = m_search_->elementComparisonType;
91            if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
92                return (USearchAttributeValue)value;
93            } else {
94                return USEARCH_STANDARD_ELEMENT_COMPARISON;
95            }
96        }
97    default :
98        return USEARCH_DEFAULT;
99    }
100}
101
102int32_t SearchIterator::getMatchedStart() const
103{
104    return m_search_->matchedIndex;
105}
106
107int32_t SearchIterator::getMatchedLength() const
108{
109    return m_search_->matchedLength;
110}
111
112void SearchIterator::getMatchedText(UnicodeString &result) const
113{
114    int32_t matchedindex  = m_search_->matchedIndex;
115    int32_t     matchedlength = m_search_->matchedLength;
116    if (matchedindex != USEARCH_DONE && matchedlength != 0) {
117        result.setTo(m_search_->text + matchedindex, matchedlength);
118    }
119    else {
120        result.remove();
121    }
122}
123
124void SearchIterator::setBreakIterator(BreakIterator *breakiter,
125                                      UErrorCode &status)
126{
127    if (U_SUCCESS(status)) {
128#if 0
129        m_search_->breakIter = NULL;
130        // the c++ breakiterator may not make use of ubreakiterator.
131        // so we'll have to keep track of it ourselves.
132#else
133        // Well, gee... the Constructors that take a BreakIterator
134        // all cast the BreakIterator to a UBreakIterator and
135        // pass it to the corresponding usearch_openFromXXX
136        // routine, so there's no reason not to do this.
137        //
138        // Besides, a UBreakIterator is a BreakIterator, so
139        // any subclass of BreakIterator should work fine here...
140        m_search_->breakIter = (UBreakIterator *) breakiter;
141#endif
142
143        m_breakiterator_ = breakiter;
144    }
145}
146
147const BreakIterator * SearchIterator::getBreakIterator(void) const
148{
149    return m_breakiterator_;
150}
151
152void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
153{
154    if (U_SUCCESS(status)) {
155        if (text.length() == 0) {
156            status = U_ILLEGAL_ARGUMENT_ERROR;
157        }
158        else {
159            m_text_        = text;
160            m_search_->text = m_text_.getBuffer();
161            m_search_->textLength = m_text_.length();
162        }
163    }
164}
165
166void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
167{
168    if (U_SUCCESS(status)) {
169        text.getText(m_text_);
170        setText(m_text_, status);
171    }
172}
173
174const UnicodeString & SearchIterator::getText(void) const
175{
176    return m_text_;
177}
178
179// operator overloading ----------------------------------------------
180
181UBool SearchIterator::operator==(const SearchIterator &that) const
182{
183    if (this == &that) {
184        return TRUE;
185    }
186    return (m_breakiterator_            == that.m_breakiterator_ &&
187            m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
188            m_search_->isOverlap        == that.m_search_->isOverlap &&
189            m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
190            m_search_->matchedIndex     == that.m_search_->matchedIndex &&
191            m_search_->matchedLength    == that.m_search_->matchedLength &&
192            m_search_->textLength       == that.m_search_->textLength &&
193            getOffset() == that.getOffset() &&
194            (uprv_memcmp(m_search_->text, that.m_search_->text,
195                              m_search_->textLength * sizeof(UChar)) == 0));
196}
197
198// public methods ----------------------------------------------------
199
200int32_t SearchIterator::first(UErrorCode &status)
201{
202    if (U_FAILURE(status)) {
203        return USEARCH_DONE;
204    }
205    setOffset(0, status);
206    return handleNext(0, status);
207}
208
209int32_t SearchIterator::following(int32_t position,
210                                      UErrorCode &status)
211{
212    if (U_FAILURE(status)) {
213        return USEARCH_DONE;
214    }
215    setOffset(position, status);
216    return handleNext(position, status);
217}
218
219int32_t SearchIterator::last(UErrorCode &status)
220{
221    if (U_FAILURE(status)) {
222        return USEARCH_DONE;
223    }
224    setOffset(m_search_->textLength, status);
225    return handlePrev(m_search_->textLength, status);
226}
227
228int32_t SearchIterator::preceding(int32_t position,
229                                      UErrorCode &status)
230{
231    if (U_FAILURE(status)) {
232        return USEARCH_DONE;
233    }
234    setOffset(position, status);
235    return handlePrev(position, status);
236}
237
238int32_t SearchIterator::next(UErrorCode &status)
239{
240    if (U_SUCCESS(status)) {
241        int32_t offset = getOffset();
242        int32_t matchindex  = m_search_->matchedIndex;
243        int32_t     matchlength = m_search_->matchedLength;
244        m_search_->reset = FALSE;
245        if (m_search_->isForwardSearching == TRUE) {
246            int32_t textlength = m_search_->textLength;
247            if (offset == textlength || matchindex == textlength ||
248                (matchindex != USEARCH_DONE &&
249                matchindex + matchlength >= textlength)) {
250                // not enough characters to match
251                setMatchNotFound();
252                return USEARCH_DONE;
253            }
254        }
255        else {
256            // switching direction.
257            // if matchedIndex == USEARCH_DONE, it means that either a
258            // setOffset has been called or that previous ran off the text
259            // string. the iterator would have been set to offset 0 if a
260            // match is not found.
261            m_search_->isForwardSearching = TRUE;
262            if (m_search_->matchedIndex != USEARCH_DONE) {
263                // there's no need to set the collation element iterator
264                // the next call to next will set the offset.
265                return matchindex;
266            }
267        }
268
269        if (matchlength > 0) {
270            // if matchlength is 0 we are at the start of the iteration
271            if (m_search_->isOverlap) {
272                offset ++;
273            }
274            else {
275                offset += matchlength;
276            }
277        }
278        return handleNext(offset, status);
279    }
280    return USEARCH_DONE;
281}
282
283int32_t SearchIterator::previous(UErrorCode &status)
284{
285    if (U_SUCCESS(status)) {
286        int32_t offset;
287        if (m_search_->reset) {
288            offset                       = m_search_->textLength;
289            m_search_->isForwardSearching = FALSE;
290            m_search_->reset              = FALSE;
291            setOffset(offset, status);
292        }
293        else {
294            offset = getOffset();
295        }
296
297        int32_t matchindex = m_search_->matchedIndex;
298        if (m_search_->isForwardSearching == TRUE) {
299            // switching direction.
300            // if matchedIndex == USEARCH_DONE, it means that either a
301            // setOffset has been called or that next ran off the text
302            // string. the iterator would have been set to offset textLength if
303            // a match is not found.
304            m_search_->isForwardSearching = FALSE;
305            if (matchindex != USEARCH_DONE) {
306                return matchindex;
307            }
308        }
309        else {
310            if (offset == 0 || matchindex == 0) {
311                // not enough characters to match
312                setMatchNotFound();
313                return USEARCH_DONE;
314            }
315        }
316
317        if (matchindex != USEARCH_DONE) {
318            if (m_search_->isOverlap) {
319                matchindex += m_search_->matchedLength - 2;
320            }
321
322            return handlePrev(matchindex, status);
323        }
324
325        return handlePrev(offset, status);
326    }
327
328    return USEARCH_DONE;
329}
330
331void SearchIterator::reset()
332{
333    UErrorCode status = U_ZERO_ERROR;
334    setMatchNotFound();
335    setOffset(0, status);
336    m_search_->isOverlap          = FALSE;
337    m_search_->isCanonicalMatch   = FALSE;
338    m_search_->elementComparisonType = 0;
339    m_search_->isForwardSearching = TRUE;
340    m_search_->reset              = TRUE;
341}
342
343// protected constructors and destructors -----------------------------
344
345SearchIterator::SearchIterator()
346{
347    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
348    m_search_->breakIter          = NULL;
349    m_search_->isOverlap          = FALSE;
350    m_search_->isCanonicalMatch   = FALSE;
351    m_search_->elementComparisonType = 0;
352    m_search_->isForwardSearching = TRUE;
353    m_search_->reset              = TRUE;
354    m_search_->matchedIndex       = USEARCH_DONE;
355    m_search_->matchedLength      = 0;
356    m_search_->text               = NULL;
357    m_search_->textLength         = 0;
358    m_breakiterator_              = NULL;
359}
360
361SearchIterator::SearchIterator(const UnicodeString &text,
362                                     BreakIterator *breakiter) :
363                                     m_breakiterator_(breakiter),
364                                     m_text_(text)
365{
366    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
367    m_search_->breakIter          = NULL;
368    m_search_->isOverlap          = FALSE;
369    m_search_->isCanonicalMatch   = FALSE;
370    m_search_->elementComparisonType = 0;
371    m_search_->isForwardSearching = TRUE;
372    m_search_->reset              = TRUE;
373    m_search_->matchedIndex       = USEARCH_DONE;
374    m_search_->matchedLength      = 0;
375    m_search_->text               = m_text_.getBuffer();
376    m_search_->textLength         = text.length();
377}
378
379SearchIterator::SearchIterator(CharacterIterator &text,
380                               BreakIterator     *breakiter) :
381                               m_breakiterator_(breakiter)
382{
383    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
384    m_search_->breakIter          = NULL;
385    m_search_->isOverlap          = FALSE;
386    m_search_->isCanonicalMatch   = FALSE;
387    m_search_->elementComparisonType = 0;
388    m_search_->isForwardSearching = TRUE;
389    m_search_->reset              = TRUE;
390    m_search_->matchedIndex       = USEARCH_DONE;
391    m_search_->matchedLength      = 0;
392    text.getText(m_text_);
393    m_search_->text               = m_text_.getBuffer();
394    m_search_->textLength         = m_text_.length();
395    m_breakiterator_             = breakiter;
396}
397
398// protected methods ------------------------------------------------------
399
400SearchIterator & SearchIterator::operator=(const SearchIterator &that)
401{
402    if (this != &that) {
403        m_breakiterator_            = that.m_breakiterator_;
404        m_text_                     = that.m_text_;
405        m_search_->breakIter        = that.m_search_->breakIter;
406        m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
407        m_search_->isOverlap        = that.m_search_->isOverlap;
408        m_search_->elementComparisonType = that.m_search_->elementComparisonType;
409        m_search_->matchedIndex     = that.m_search_->matchedIndex;
410        m_search_->matchedLength    = that.m_search_->matchedLength;
411        m_search_->text             = that.m_search_->text;
412        m_search_->textLength       = that.m_search_->textLength;
413    }
414    return *this;
415}
416
417void SearchIterator::setMatchLength(int32_t length)
418{
419    m_search_->matchedLength = length;
420}
421
422void SearchIterator::setMatchStart(int32_t position)
423{
424    m_search_->matchedIndex = position;
425}
426
427void SearchIterator::setMatchNotFound()
428{
429    setMatchStart(USEARCH_DONE);
430    setMatchLength(0);
431    UErrorCode status = U_ZERO_ERROR;
432    // by default no errors should be returned here since offsets are within
433    // range.
434    if (m_search_->isForwardSearching) {
435        setOffset(m_search_->textLength, status);
436    }
437    else {
438        setOffset(0, status);
439    }
440}
441
442
443U_NAMESPACE_END
444
445#endif /* #if !UCONFIG_NO_COLLATION */
446