1/*
2**********************************************************************
3*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
4**********************************************************************
5*   Date        Name        Description
6*  03/22/2000   helena      Creation.
7**********************************************************************
8*/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/stsearch.h"
15#include "usrchimp.h"
16#include "cmemory.h"
17
18U_NAMESPACE_BEGIN
19
20UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
21
22// public constructors and destructors -----------------------------------
23
24StringSearch::StringSearch(const UnicodeString &pattern,
25                           const UnicodeString &text,
26                           const Locale        &locale,
27                                 BreakIterator *breakiter,
28                                 UErrorCode    &status) :
29                           SearchIterator(text, breakiter),
30                           m_collator_(),
31                           m_pattern_(pattern)
32{
33    if (U_FAILURE(status)) {
34        m_strsrch_ = NULL;
35        return;
36    }
37
38    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
39                              m_text_.getBuffer(), m_text_.length(),
40                              locale.getName(), (UBreakIterator *)breakiter,
41                              &status);
42    uprv_free(m_search_);
43    m_search_ = NULL;
44
45    // !!! dlf m_collator_ is an odd beast.  basically it is an aliasing
46    // wrapper around the internal collator and rules, which (here) are
47    // owned by this stringsearch object.  this means 1) it's destructor
48    // _should not_ delete the ucollator or rules, and 2) changes made
49    // to the exposed collator (setStrength etc) _should_ modify the
50    // ucollator.  thus the collator is not a copy-on-write alias, and it
51    // needs to distinguish itself not merely from 'stand alone' colators
52    // but also from copy-on-write ones.  it needs additional state, which
53    // setUCollator should set.
54
55    if (U_SUCCESS(status)) {
56        // Alias the collator
57        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
58        // m_search_ has been created by the base SearchIterator class
59        m_search_        = m_strsrch_->search;
60    }
61}
62
63StringSearch::StringSearch(const UnicodeString     &pattern,
64                           const UnicodeString     &text,
65                                 RuleBasedCollator *coll,
66                                 BreakIterator     *breakiter,
67                                 UErrorCode        &status) :
68                           SearchIterator(text, breakiter),
69                           m_collator_(),
70                           m_pattern_(pattern)
71{
72    if (U_FAILURE(status)) {
73        m_strsrch_ = NULL;
74        return;
75    }
76    if (coll == NULL) {
77        status     = U_ILLEGAL_ARGUMENT_ERROR;
78        m_strsrch_ = NULL;
79        return;
80    }
81    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
82                                          m_pattern_.length(),
83                                          m_text_.getBuffer(),
84                                          m_text_.length(), coll->ucollator,
85                                          (UBreakIterator *)breakiter,
86                                          &status);
87    uprv_free(m_search_);
88    m_search_ = NULL;
89
90    if (U_SUCCESS(status)) {
91        // Alias the collator
92        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
93        // m_search_ has been created by the base SearchIterator class
94        m_search_ = m_strsrch_->search;
95    }
96}
97
98StringSearch::StringSearch(const UnicodeString     &pattern,
99                                 CharacterIterator &text,
100                           const Locale            &locale,
101                                 BreakIterator     *breakiter,
102                                 UErrorCode        &status) :
103                           SearchIterator(text, breakiter),
104                           m_collator_(),
105                           m_pattern_(pattern)
106{
107    if (U_FAILURE(status)) {
108        m_strsrch_ = NULL;
109        return;
110    }
111    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
112                              m_text_.getBuffer(), m_text_.length(),
113                              locale.getName(), (UBreakIterator *)breakiter,
114                              &status);
115    uprv_free(m_search_);
116    m_search_ = NULL;
117
118    if (U_SUCCESS(status)) {
119        // Alias the collator
120        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
121        // m_search_ has been created by the base SearchIterator class
122        m_search_ = m_strsrch_->search;
123    }
124}
125
126StringSearch::StringSearch(const UnicodeString     &pattern,
127                                 CharacterIterator &text,
128                                 RuleBasedCollator *coll,
129                                 BreakIterator     *breakiter,
130                                 UErrorCode        &status) :
131                           SearchIterator(text, breakiter),
132                           m_collator_(),
133                           m_pattern_(pattern)
134{
135    if (U_FAILURE(status)) {
136        m_strsrch_ = NULL;
137        return;
138    }
139    if (coll == NULL) {
140        status     = U_ILLEGAL_ARGUMENT_ERROR;
141        m_strsrch_ = NULL;
142        return;
143    }
144    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
145                                          m_pattern_.length(),
146                                          m_text_.getBuffer(),
147                                          m_text_.length(), coll->ucollator,
148                                          (UBreakIterator *)breakiter,
149                                          &status);
150    uprv_free(m_search_);
151    m_search_ = NULL;
152
153    if (U_SUCCESS(status)) {
154        // Alias the collator
155        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
156        // m_search_ has been created by the base SearchIterator class
157        m_search_ = m_strsrch_->search;
158    }
159}
160
161StringSearch::StringSearch(const StringSearch &that) :
162                       SearchIterator(that.m_text_, that.m_breakiterator_),
163                       m_collator_(),
164                       m_pattern_(that.m_pattern_)
165{
166    UErrorCode status = U_ZERO_ERROR;
167
168    // Free m_search_ from the superclass
169    uprv_free(m_search_);
170    m_search_ = NULL;
171
172    if (that.m_strsrch_ == NULL) {
173        // This was not a good copy
174        m_strsrch_ = NULL;
175    }
176    else {
177        // Make a deep copy
178        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
179                                              m_pattern_.length(),
180                                              m_text_.getBuffer(),
181                                              m_text_.length(),
182                                              that.m_strsrch_->collator,
183                                             (UBreakIterator *)that.m_breakiterator_,
184                                              &status);
185        if (U_SUCCESS(status)) {
186            // Alias the collator
187            m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
188            // m_search_ has been created by the base SearchIterator class
189            m_search_        = m_strsrch_->search;
190        }
191    }
192}
193
194StringSearch::~StringSearch()
195{
196    if (m_strsrch_ != NULL) {
197        usearch_close(m_strsrch_);
198        m_search_ = NULL;
199    }
200}
201
202StringSearch *
203StringSearch::clone() const {
204    return new StringSearch(*this);
205}
206
207// operator overloading ---------------------------------------------
208StringSearch & StringSearch::operator=(const StringSearch &that)
209{
210    if ((*this) != that) {
211        UErrorCode status = U_ZERO_ERROR;
212        m_text_          = that.m_text_;
213        m_breakiterator_ = that.m_breakiterator_;
214        m_pattern_       = that.m_pattern_;
215        // all m_search_ in the parent class is linked up with m_strsrch_
216        usearch_close(m_strsrch_);
217        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
218                                              m_pattern_.length(),
219                                              m_text_.getBuffer(),
220                                              m_text_.length(),
221                                              that.m_strsrch_->collator,
222                                              NULL, &status);
223        // Check null pointer
224        if (m_strsrch_ != NULL) {
225	        // Alias the collator
226	        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
227	        m_search_ = m_strsrch_->search;
228        }
229    }
230    return *this;
231}
232
233UBool StringSearch::operator==(const SearchIterator &that) const
234{
235    if (this == &that) {
236        return TRUE;
237    }
238    if (SearchIterator::operator ==(that)) {
239        StringSearch &thatsrch = (StringSearch &)that;
240        return (this->m_pattern_ == thatsrch.m_pattern_ &&
241                this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
242    }
243    return FALSE;
244}
245
246// public get and set methods ----------------------------------------
247
248void StringSearch::setOffset(int32_t position, UErrorCode &status)
249{
250    // status checked in usearch_setOffset
251    usearch_setOffset(m_strsrch_, position, &status);
252}
253
254int32_t StringSearch::getOffset(void) const
255{
256    return usearch_getOffset(m_strsrch_);
257}
258
259void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
260{
261    if (U_SUCCESS(status)) {
262        m_text_ = text;
263        usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
264    }
265}
266
267void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
268{
269    if (U_SUCCESS(status)) {
270        text.getText(m_text_);
271        usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
272    }
273}
274
275RuleBasedCollator * StringSearch::getCollator() const
276{
277    return (RuleBasedCollator *)&m_collator_;
278}
279
280void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
281{
282    if (U_SUCCESS(status)) {
283        usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
284        // Alias the collator
285        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
286    }
287}
288
289void StringSearch::setPattern(const UnicodeString &pattern,
290                                    UErrorCode    &status)
291{
292    if (U_SUCCESS(status)) {
293        m_pattern_ = pattern;
294        usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
295                           &status);
296    }
297}
298
299const UnicodeString & StringSearch::getPattern() const
300{
301    return m_pattern_;
302}
303
304// public methods ----------------------------------------------------
305
306void StringSearch::reset()
307{
308    usearch_reset(m_strsrch_);
309}
310
311SearchIterator * StringSearch::safeClone(void) const
312{
313    UErrorCode status = U_ZERO_ERROR;
314    StringSearch *result = new StringSearch(m_pattern_, m_text_,
315                                            (RuleBasedCollator *)&m_collator_,
316                                            m_breakiterator_,
317                                            status);
318    /* test for NULL */
319    if (result == 0) {
320        status = U_MEMORY_ALLOCATION_ERROR;
321        return 0;
322    }
323    result->setOffset(getOffset(), status);
324    result->setMatchStart(m_strsrch_->search->matchedIndex);
325    result->setMatchLength(m_strsrch_->search->matchedLength);
326    if (U_FAILURE(status)) {
327        return NULL;
328    }
329    return result;
330}
331
332// protected method -------------------------------------------------
333
334int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
335{
336    // values passed here are already in the pre-shift position
337    if (U_SUCCESS(status)) {
338        if (m_strsrch_->pattern.CELength == 0) {
339            m_search_->matchedIndex =
340                                    m_search_->matchedIndex == USEARCH_DONE ?
341                                    getOffset() : m_search_->matchedIndex + 1;
342            m_search_->matchedLength = 0;
343            ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
344                           &status);
345            if (m_search_->matchedIndex == m_search_->textLength) {
346                m_search_->matchedIndex = USEARCH_DONE;
347            }
348        }
349        else {
350            // looking at usearch.cpp, this part is shifted out to
351            // StringSearch instead of SearchIterator because m_strsrch_ is
352            // not accessible in SearchIterator
353#if 0
354            if (position + m_strsrch_->pattern.defaultShiftSize
355                > m_search_->textLength) {
356                setMatchNotFound();
357                return USEARCH_DONE;
358            }
359#endif
360            if (m_search_->matchedLength <= 0) {
361                // the flipping direction issue has already been handled
362                // in next()
363                // for boundary check purposes. this will ensure that the
364                // next match will not preceed the current offset
365                // note search->matchedIndex will always be set to something
366                // in the code
367                m_search_->matchedIndex = position - 1;
368            }
369
370            ucol_setOffset(m_strsrch_->textIter, position, &status);
371
372#if 0
373            for (;;) {
374                if (m_search_->isCanonicalMatch) {
375                    // can't use exact here since extra accents are allowed.
376                    usearch_handleNextCanonical(m_strsrch_, &status);
377                }
378                else {
379                    usearch_handleNextExact(m_strsrch_, &status);
380                }
381                if (U_FAILURE(status)) {
382                    return USEARCH_DONE;
383                }
384                if (m_breakiterator_ == NULL
385#if !UCONFIG_NO_BREAK_ITERATION
386                    ||
387                    m_search_->matchedIndex == USEARCH_DONE ||
388                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
389                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
390                                                  m_search_->matchedLength))
391#endif
392                ) {
393                    if (m_search_->matchedIndex == USEARCH_DONE) {
394                        ucol_setOffset(m_strsrch_->textIter,
395                                       m_search_->textLength, &status);
396                    }
397                    else {
398                        ucol_setOffset(m_strsrch_->textIter,
399                                       m_search_->matchedIndex, &status);
400                    }
401                    return m_search_->matchedIndex;
402                }
403            }
404#else
405            // if m_strsrch_->breakIter is always the same as m_breakiterator_
406            // then we don't need to check the match boundaries here because
407            // usearch_handleNextXXX will already have done it.
408            if (m_search_->isCanonicalMatch) {
409            	// *could* actually use exact here 'cause no extra accents allowed...
410            	usearch_handleNextCanonical(m_strsrch_, &status);
411            } else {
412            	usearch_handleNextExact(m_strsrch_, &status);
413            }
414
415            if (U_FAILURE(status)) {
416            	return USEARCH_DONE;
417            }
418
419            if (m_search_->matchedIndex == USEARCH_DONE) {
420            	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
421            } else {
422            	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
423            }
424
425            return m_search_->matchedIndex;
426#endif
427        }
428    }
429    return USEARCH_DONE;
430}
431
432int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
433{
434    // values passed here are already in the pre-shift position
435    if (U_SUCCESS(status)) {
436        if (m_strsrch_->pattern.CELength == 0) {
437            m_search_->matchedIndex =
438                  (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
439                   m_search_->matchedIndex);
440            if (m_search_->matchedIndex == 0) {
441                setMatchNotFound();
442            }
443            else {
444                m_search_->matchedIndex --;
445                ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
446                               &status);
447                m_search_->matchedLength = 0;
448            }
449        }
450        else {
451            // looking at usearch.cpp, this part is shifted out to
452            // StringSearch instead of SearchIterator because m_strsrch_ is
453            // not accessible in SearchIterator
454#if 0
455            if (!m_search_->isOverlap &&
456                position - m_strsrch_->pattern.defaultShiftSize < 0) {
457                setMatchNotFound();
458                return USEARCH_DONE;
459            }
460
461            for (;;) {
462                if (m_search_->isCanonicalMatch) {
463                    // can't use exact here since extra accents are allowed.
464                    usearch_handlePreviousCanonical(m_strsrch_, &status);
465                }
466                else {
467                    usearch_handlePreviousExact(m_strsrch_, &status);
468                }
469                if (U_FAILURE(status)) {
470                    return USEARCH_DONE;
471                }
472                if (m_breakiterator_ == NULL
473#if !UCONFIG_NO_BREAK_ITERATION
474                    ||
475                    m_search_->matchedIndex == USEARCH_DONE ||
476                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
477                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
478                                                  m_search_->matchedLength))
479#endif
480                ) {
481                    return m_search_->matchedIndex;
482                }
483            }
484#else
485            ucol_setOffset(m_strsrch_->textIter, position, &status);
486
487            if (m_search_->isCanonicalMatch) {
488            	// *could* use exact match here since extra accents *not* allowed!
489            	usearch_handlePreviousCanonical(m_strsrch_, &status);
490            } else {
491            	usearch_handlePreviousExact(m_strsrch_, &status);
492            }
493
494            if (U_FAILURE(status)) {
495            	return USEARCH_DONE;
496            }
497
498            return m_search_->matchedIndex;
499#endif
500        }
501
502        return m_search_->matchedIndex;
503    }
504    return USEARCH_DONE;
505}
506
507U_NAMESPACE_END
508
509#endif /* #if !UCONFIG_NO_COLLATION */
510