1/*
2**********************************************************************
3*   Copyright (C) 2001-2014 IBM and others. All rights reserved.
4**********************************************************************
5*   Date        Name        Description
6*  03/22/2000   helena      Creation.
7**********************************************************************
8*/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/stsearch.h"
15#include "usrchimp.h"
16#include "cmemory.h"
17
18U_NAMESPACE_BEGIN
19
20UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
21
22// public constructors and destructors -----------------------------------
23
24StringSearch::StringSearch(const UnicodeString &pattern,
25                           const UnicodeString &text,
26                           const Locale        &locale,
27                                 BreakIterator *breakiter,
28                                 UErrorCode    &status) :
29                           SearchIterator(text, breakiter),
30                           m_pattern_(pattern)
31{
32    if (U_FAILURE(status)) {
33        m_strsrch_ = NULL;
34        return;
35    }
36
37    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
38                              m_text_.getBuffer(), m_text_.length(),
39                              locale.getName(), (UBreakIterator *)breakiter,
40                              &status);
41    uprv_free(m_search_);
42    m_search_ = NULL;
43
44    if (U_SUCCESS(status)) {
45        // m_search_ has been created by the base SearchIterator class
46        m_search_        = m_strsrch_->search;
47    }
48}
49
50StringSearch::StringSearch(const UnicodeString     &pattern,
51                           const UnicodeString     &text,
52                                 RuleBasedCollator *coll,
53                                 BreakIterator     *breakiter,
54                                 UErrorCode        &status) :
55                           SearchIterator(text, breakiter),
56                           m_pattern_(pattern)
57{
58    if (U_FAILURE(status)) {
59        m_strsrch_ = NULL;
60        return;
61    }
62    if (coll == NULL) {
63        status     = U_ILLEGAL_ARGUMENT_ERROR;
64        m_strsrch_ = NULL;
65        return;
66    }
67    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
68                                          m_pattern_.length(),
69                                          m_text_.getBuffer(),
70                                          m_text_.length(), coll->toUCollator(),
71                                          (UBreakIterator *)breakiter,
72                                          &status);
73    uprv_free(m_search_);
74    m_search_ = NULL;
75
76    if (U_SUCCESS(status)) {
77        // m_search_ has been created by the base SearchIterator class
78        m_search_ = m_strsrch_->search;
79    }
80}
81
82StringSearch::StringSearch(const UnicodeString     &pattern,
83                                 CharacterIterator &text,
84                           const Locale            &locale,
85                                 BreakIterator     *breakiter,
86                                 UErrorCode        &status) :
87                           SearchIterator(text, breakiter),
88                           m_pattern_(pattern)
89{
90    if (U_FAILURE(status)) {
91        m_strsrch_ = NULL;
92        return;
93    }
94    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
95                              m_text_.getBuffer(), m_text_.length(),
96                              locale.getName(), (UBreakIterator *)breakiter,
97                              &status);
98    uprv_free(m_search_);
99    m_search_ = NULL;
100
101    if (U_SUCCESS(status)) {
102        // m_search_ has been created by the base SearchIterator class
103        m_search_ = m_strsrch_->search;
104    }
105}
106
107StringSearch::StringSearch(const UnicodeString     &pattern,
108                                 CharacterIterator &text,
109                                 RuleBasedCollator *coll,
110                                 BreakIterator     *breakiter,
111                                 UErrorCode        &status) :
112                           SearchIterator(text, breakiter),
113                           m_pattern_(pattern)
114{
115    if (U_FAILURE(status)) {
116        m_strsrch_ = NULL;
117        return;
118    }
119    if (coll == NULL) {
120        status     = U_ILLEGAL_ARGUMENT_ERROR;
121        m_strsrch_ = NULL;
122        return;
123    }
124    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
125                                          m_pattern_.length(),
126                                          m_text_.getBuffer(),
127                                          m_text_.length(), coll->toUCollator(),
128                                          (UBreakIterator *)breakiter,
129                                          &status);
130    uprv_free(m_search_);
131    m_search_ = NULL;
132
133    if (U_SUCCESS(status)) {
134        // m_search_ has been created by the base SearchIterator class
135        m_search_ = m_strsrch_->search;
136    }
137}
138
139StringSearch::StringSearch(const StringSearch &that) :
140                       SearchIterator(that.m_text_, that.m_breakiterator_),
141                       m_pattern_(that.m_pattern_)
142{
143    UErrorCode status = U_ZERO_ERROR;
144
145    // Free m_search_ from the superclass
146    uprv_free(m_search_);
147    m_search_ = NULL;
148
149    if (that.m_strsrch_ == NULL) {
150        // This was not a good copy
151        m_strsrch_ = NULL;
152    }
153    else {
154        // Make a deep copy
155        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
156                                              m_pattern_.length(),
157                                              m_text_.getBuffer(),
158                                              m_text_.length(),
159                                              that.m_strsrch_->collator,
160                                             (UBreakIterator *)that.m_breakiterator_,
161                                              &status);
162        if (U_SUCCESS(status)) {
163            // m_search_ has been created by the base SearchIterator class
164            m_search_        = m_strsrch_->search;
165        }
166    }
167}
168
169StringSearch::~StringSearch()
170{
171    if (m_strsrch_ != NULL) {
172        usearch_close(m_strsrch_);
173        m_search_ = NULL;
174    }
175}
176
177StringSearch *
178StringSearch::clone() const {
179    return new StringSearch(*this);
180}
181
182// operator overloading ---------------------------------------------
183StringSearch & StringSearch::operator=(const StringSearch &that)
184{
185    if ((*this) != that) {
186        UErrorCode status = U_ZERO_ERROR;
187        m_text_          = that.m_text_;
188        m_breakiterator_ = that.m_breakiterator_;
189        m_pattern_       = that.m_pattern_;
190        // all m_search_ in the parent class is linked up with m_strsrch_
191        usearch_close(m_strsrch_);
192        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
193                                              m_pattern_.length(),
194                                              m_text_.getBuffer(),
195                                              m_text_.length(),
196                                              that.m_strsrch_->collator,
197                                              NULL, &status);
198        // Check null pointer
199        if (m_strsrch_ != NULL) {
200            m_search_ = m_strsrch_->search;
201        }
202    }
203    return *this;
204}
205
206UBool StringSearch::operator==(const SearchIterator &that) const
207{
208    if (this == &that) {
209        return TRUE;
210    }
211    if (SearchIterator::operator ==(that)) {
212        StringSearch &thatsrch = (StringSearch &)that;
213        return (this->m_pattern_ == thatsrch.m_pattern_ &&
214                this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
215    }
216    return FALSE;
217}
218
219// public get and set methods ----------------------------------------
220
221void StringSearch::setOffset(int32_t position, UErrorCode &status)
222{
223    // status checked in usearch_setOffset
224    usearch_setOffset(m_strsrch_, position, &status);
225}
226
227int32_t StringSearch::getOffset(void) const
228{
229    return usearch_getOffset(m_strsrch_);
230}
231
232void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
233{
234    if (U_SUCCESS(status)) {
235        m_text_ = text;
236        usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
237    }
238}
239
240void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
241{
242    if (U_SUCCESS(status)) {
243        text.getText(m_text_);
244        usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
245    }
246}
247
248RuleBasedCollator * StringSearch::getCollator() const
249{
250    // Note the const_cast. It would be cleaner if this const method returned a const collator.
251    return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
252}
253
254void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
255{
256    if (U_SUCCESS(status)) {
257        usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
258    }
259}
260
261void StringSearch::setPattern(const UnicodeString &pattern,
262                                    UErrorCode    &status)
263{
264    if (U_SUCCESS(status)) {
265        m_pattern_ = pattern;
266        usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
267                           &status);
268    }
269}
270
271const UnicodeString & StringSearch::getPattern() const
272{
273    return m_pattern_;
274}
275
276// public methods ----------------------------------------------------
277
278void StringSearch::reset()
279{
280    usearch_reset(m_strsrch_);
281}
282
283SearchIterator * StringSearch::safeClone(void) const
284{
285    UErrorCode status = U_ZERO_ERROR;
286    StringSearch *result = new StringSearch(m_pattern_, m_text_,
287                                            getCollator(),
288                                            m_breakiterator_,
289                                            status);
290    /* test for NULL */
291    if (result == 0) {
292        status = U_MEMORY_ALLOCATION_ERROR;
293        return 0;
294    }
295    result->setOffset(getOffset(), status);
296    result->setMatchStart(m_strsrch_->search->matchedIndex);
297    result->setMatchLength(m_strsrch_->search->matchedLength);
298    if (U_FAILURE(status)) {
299        return NULL;
300    }
301    return result;
302}
303
304// protected method -------------------------------------------------
305
306int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
307{
308    // values passed here are already in the pre-shift position
309    if (U_SUCCESS(status)) {
310        if (m_strsrch_->pattern.CELength == 0) {
311            m_search_->matchedIndex =
312                                    m_search_->matchedIndex == USEARCH_DONE ?
313                                    getOffset() : m_search_->matchedIndex + 1;
314            m_search_->matchedLength = 0;
315            ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
316                           &status);
317            if (m_search_->matchedIndex == m_search_->textLength) {
318                m_search_->matchedIndex = USEARCH_DONE;
319            }
320        }
321        else {
322            // looking at usearch.cpp, this part is shifted out to
323            // StringSearch instead of SearchIterator because m_strsrch_ is
324            // not accessible in SearchIterator
325#if 0
326            if (position + m_strsrch_->pattern.defaultShiftSize
327                > m_search_->textLength) {
328                setMatchNotFound();
329                return USEARCH_DONE;
330            }
331#endif
332            if (m_search_->matchedLength <= 0) {
333                // the flipping direction issue has already been handled
334                // in next()
335                // for boundary check purposes. this will ensure that the
336                // next match will not preceed the current offset
337                // note search->matchedIndex will always be set to something
338                // in the code
339                m_search_->matchedIndex = position - 1;
340            }
341
342            ucol_setOffset(m_strsrch_->textIter, position, &status);
343
344#if 0
345            for (;;) {
346                if (m_search_->isCanonicalMatch) {
347                    // can't use exact here since extra accents are allowed.
348                    usearch_handleNextCanonical(m_strsrch_, &status);
349                }
350                else {
351                    usearch_handleNextExact(m_strsrch_, &status);
352                }
353                if (U_FAILURE(status)) {
354                    return USEARCH_DONE;
355                }
356                if (m_breakiterator_ == NULL
357#if !UCONFIG_NO_BREAK_ITERATION
358                    ||
359                    m_search_->matchedIndex == USEARCH_DONE ||
360                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
361                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
362                                                  m_search_->matchedLength))
363#endif
364                ) {
365                    if (m_search_->matchedIndex == USEARCH_DONE) {
366                        ucol_setOffset(m_strsrch_->textIter,
367                                       m_search_->textLength, &status);
368                    }
369                    else {
370                        ucol_setOffset(m_strsrch_->textIter,
371                                       m_search_->matchedIndex, &status);
372                    }
373                    return m_search_->matchedIndex;
374                }
375            }
376#else
377            // if m_strsrch_->breakIter is always the same as m_breakiterator_
378            // then we don't need to check the match boundaries here because
379            // usearch_handleNextXXX will already have done it.
380            if (m_search_->isCanonicalMatch) {
381            	// *could* actually use exact here 'cause no extra accents allowed...
382            	usearch_handleNextCanonical(m_strsrch_, &status);
383            } else {
384            	usearch_handleNextExact(m_strsrch_, &status);
385            }
386
387            if (U_FAILURE(status)) {
388            	return USEARCH_DONE;
389            }
390
391            if (m_search_->matchedIndex == USEARCH_DONE) {
392            	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
393            } else {
394            	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
395            }
396
397            return m_search_->matchedIndex;
398#endif
399        }
400    }
401    return USEARCH_DONE;
402}
403
404int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
405{
406    // values passed here are already in the pre-shift position
407    if (U_SUCCESS(status)) {
408        if (m_strsrch_->pattern.CELength == 0) {
409            m_search_->matchedIndex =
410                  (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
411                   m_search_->matchedIndex);
412            if (m_search_->matchedIndex == 0) {
413                setMatchNotFound();
414            }
415            else {
416                m_search_->matchedIndex --;
417                ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
418                               &status);
419                m_search_->matchedLength = 0;
420            }
421        }
422        else {
423            // looking at usearch.cpp, this part is shifted out to
424            // StringSearch instead of SearchIterator because m_strsrch_ is
425            // not accessible in SearchIterator
426#if 0
427            if (!m_search_->isOverlap &&
428                position - m_strsrch_->pattern.defaultShiftSize < 0) {
429                setMatchNotFound();
430                return USEARCH_DONE;
431            }
432
433            for (;;) {
434                if (m_search_->isCanonicalMatch) {
435                    // can't use exact here since extra accents are allowed.
436                    usearch_handlePreviousCanonical(m_strsrch_, &status);
437                }
438                else {
439                    usearch_handlePreviousExact(m_strsrch_, &status);
440                }
441                if (U_FAILURE(status)) {
442                    return USEARCH_DONE;
443                }
444                if (m_breakiterator_ == NULL
445#if !UCONFIG_NO_BREAK_ITERATION
446                    ||
447                    m_search_->matchedIndex == USEARCH_DONE ||
448                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
449                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
450                                                  m_search_->matchedLength))
451#endif
452                ) {
453                    return m_search_->matchedIndex;
454                }
455            }
456#else
457            ucol_setOffset(m_strsrch_->textIter, position, &status);
458
459            if (m_search_->isCanonicalMatch) {
460            	// *could* use exact match here since extra accents *not* allowed!
461            	usearch_handlePreviousCanonical(m_strsrch_, &status);
462            } else {
463            	usearch_handlePreviousExact(m_strsrch_, &status);
464            }
465
466            if (U_FAILURE(status)) {
467            	return USEARCH_DONE;
468            }
469
470            return m_search_->matchedIndex;
471#endif
472        }
473
474        return m_search_->matchedIndex;
475    }
476    return USEARCH_DONE;
477}
478
479U_NAMESPACE_END
480
481#endif /* #if !UCONFIG_NO_COLLATION */
482