17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
3f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert * Copyright (C) 1996-2015, International Business Machines Corporation and    *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>SearchIterator</tt> is an abstract base class that provides
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * methods to search for a pattern within a text string. Instances of
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>SearchIterator</tt> maintain a current position and scan over the
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * target text, returning the indices the pattern is matched and the length
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of each match.
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>SearchIterator</tt> defines a protocol for text searching.
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Subclasses provide concrete implementations of various search algorithms.
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For example, <tt>StringSearch</tt> implements language-sensitive pattern
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * matching based on the comparison rules defined in a
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>RuleBasedCollator</tt> object.
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Other options for searching include using a BreakIterator to restrict
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the points at which matches are detected.
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>SearchIterator</tt> provides an API that is similar to that of
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * other text iteration classes such as <tt>BreakIterator</tt>. Using
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * this class, it is easy to scan through text looking for all occurrences of
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a given pattern. The following example uses a <tt>StringSearch</tt>
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * object to find all instances of "fox" in the target string. Any other
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * subclass of <tt>SearchIterator</tt> can be used in an identical
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * manner.
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre><code>
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * String target = "The quick brown fox jumped over the lazy fox";
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * String pattern = "fox";
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * SearchIterator iter = new StringSearch(pattern, target);
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for (int pos = iter.first(); pos != SearchIterator.DONE;
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *         pos = iter.next()) {
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *     System.out.println("Found match at " + pos +
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *             ", length is " + iter.getMatchLength());
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * }
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </code></pre>
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Laura Werner, synwee
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see BreakIterator
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see RuleBasedCollator
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic abstract class SearchIterator
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert{
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The BreakIterator to define the boundaries of a logical match.
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This value can be a null.
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See class documentation for more information.
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setBreakIterator(BreakIterator)
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getBreakIterator
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see BreakIterator
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected BreakIterator breakIterator;
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Target text for searching.
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setTarget(CharacterIterator)
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getTarget
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected CharacterIterator targetText;
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Length of the most current match in target text.
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Value 0 is the default value.
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setMatchLength
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getMatchLength
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected int matchLength;
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Java port of ICU4C struct USearch (usrchimp.h)
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Note:
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *  ICU4J already exposed some protected members such as
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * targetText, breakIterator and matchedLength as a part of stable
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * APIs. In ICU4C, they are exposed through USearch struct,
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * although USearch struct itself is internal API.
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *  This class was created for making ICU4J code parallel to
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ICU4C implementation. ICU4J implementation access member
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * fields like C struct (e.g. search_.isOverlap_) mostly, except
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * fields already exposed as protected member (e.g. search_.text()).
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    final class Search {
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharacterIterator text() {
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return SearchIterator.this.targetText;
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        void setTarget(CharacterIterator text) {
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SearchIterator.this.targetText = text;
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /** Flag to indicate if overlapping search is to be done.
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            E.g. looking for "aa" in "aaa" will yield matches at offset 0 and 1. */
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean isOverlap_;
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean isCanonicalMatch_;
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ElementComparisonType elementComparisonType_;
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BreakIterator internalBreakIter_;
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BreakIterator breakIter() {
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return SearchIterator.this.breakIterator;
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        void setBreakIter(BreakIterator breakIter) {
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SearchIterator.this.breakIterator = breakIter;
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int matchedIndex_;
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int matchedLength() {
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return SearchIterator.this.matchLength;
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        void setMatchedLength(int matchedLength) {
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SearchIterator.this.matchLength = matchedLength;
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /** Flag indicates if we are doing a forwards search */
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean isForwardSearching_;
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /** Flag indicates if we are at the start of a string search.
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            This indicates that we are in forward search and at the start of m_text. */
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean reset_;
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Convenient methods for accessing begin/end index of the
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // target text. These are ICU4J only and are not data fields.
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int beginIndex() {
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (targetText == null) {
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return 0;
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return targetText.getBeginIndex();
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int endIndex() {
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (targetText == null) {
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return 0;
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return targetText.getEndIndex();
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    Search search_ = new Search();
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // public data members -------------------------------------------------
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * DONE is returned by previous() and next() after all valid matches have
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * been returned, and by first() and last() if there are no matches at all.
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #previous
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #next
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int DONE = -1;
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // public methods -----------------------------------------------------
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // public setters -----------------------------------------------------
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the position in the target text at which the next search will start.
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This method clears any previous match.
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </p>
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param position position from which to start the next search
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IndexOutOfBoundsException thrown if argument position is out
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            of the target text range.
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getIndex
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setIndex(int position) {
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (position < search_.beginIndex()
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            || position > search_.endIndex()) {
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IndexOutOfBoundsException(
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                "setIndex(int) expected position to be between " +
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                search_.beginIndex() + " and " + search_.endIndex());
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.reset_ = false;
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setMatchedLength(0);
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.matchedIndex_ = DONE;
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Determines whether overlapping matches are returned. See the class
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * documentation for more information about overlapping matches.
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The default setting of this property is false
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param allowOverlap flag indicator if overlapping matches are allowed
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #isOverlapping
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setOverlapping(boolean allowOverlap) {
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isOverlap_ = allowOverlap;
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the BreakIterator that will be used to restrict the points
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * at which matches are detected.
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param breakiter A BreakIterator that will be used to restrict the
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                points at which matches are detected. If a match is
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                found, but the match's start or end index is not a
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                boundary as determined by the {@link BreakIterator},
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                the match will be rejected and another will be searched
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                for. If this parameter is <tt>null</tt>, no break
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                detection is attempted.
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see BreakIterator
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setBreakIterator(BreakIterator breakiter) {
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setBreakIter(breakiter);
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.breakIter() != null) {
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Create a clone of CharacterItearator, so it won't
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // affect the position currently held by search_.text()
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.text() != null) {
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                search_.breakIter().setText((CharacterIterator)search_.text().clone());
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set the target text to be searched. Text iteration will then begin at
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the start of the text string. This method is useful if you want to
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * reuse an iterator to search within a different body of text.
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param text new text iterator to look for match,
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IllegalArgumentException thrown when text is null or has
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *               0 length
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getTarget
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.4
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setTarget(CharacterIterator text)
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (text == null || text.getEndIndex() == text.getIndex()) {
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("Illegal null or empty text");
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        text.setIndex(text.getBeginIndex());
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setTarget(text);
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.matchedIndex_ = DONE;
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setMatchedLength(0);
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.reset_ = true;
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isForwardSearching_ = true;
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.breakIter() != null) {
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Create a clone of CharacterItearator, so it won't
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // affect the position currently held by search_.text()
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.breakIter().setText((CharacterIterator)text.clone());
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.internalBreakIter_ != null) {
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.internalBreakIter_.setText((CharacterIterator)text.clone());
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //TODO: We may add APIs below to match ICU4C APIs
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // setCanonicalMatch
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // public getters ----------------------------------------------------
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Returns the index to the match in the text string that was searched.
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * This call returns a valid result only after a successful call to
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Just after construction, or after a searching method returns
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * {@link #DONE}, this method will return {@link #DONE}.
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * <p>
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Use {@link #getMatchLength} to get the matched string length.
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    *
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @return index of a substring within the text string that is being
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    *         searched.
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @see #first
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @see #next
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @see #previous
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @see #last
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @stable ICU 2.0
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getMatchStart() {
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return search_.matchedIndex_;
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the current index in the text being searched.
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If the iteration has gone past the end of the text
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (or past the beginning for a backwards search), {@link #DONE}
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * is returned.
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return current index in the text being searched.
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract int getIndex();
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the length of text in the string which matches the search
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * pattern. This call returns a valid result only after a successful call
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Just after construction, or after a searching method returns
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #DONE}, this method will return 0.
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The length of the match in the target text, or 0 if there
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         is no match currently.
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #first
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #next
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #previous
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #last
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getMatchLength() {
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return search_.matchedLength();
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the BreakIterator that is used to restrict the indexes at which
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * matches are detected. This will be the same object that was passed to
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the constructor or to {@link #setBreakIterator}.
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If the {@link BreakIterator} has not been set, <tt>null</tt> will be returned.
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See {@link #setBreakIterator} for more information.
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the BreakIterator set to restrict logic matches
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setBreakIterator
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see BreakIterator
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public BreakIterator getBreakIterator() {
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return search_.breakIter();
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return the string text to be searched.
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return text string to be searched.
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public CharacterIterator getTarget() {
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return search_.text();
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the text that was matched by the most recent call to
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If the iterator is not pointing at a valid match (e.g. just after
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * construction or after {@link #DONE} has been returned,
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * returns an empty string.
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return  the substring in the target test of the most recent match,
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *          or null if there is no match currently.
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #first
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #next
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #previous
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #last
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String getMatchedText() {
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.matchedLength() > 0) {
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int limit = search_.matchedIndex_ + search_.matchedLength();
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuilder result = new StringBuilder(search_.matchedLength());
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CharacterIterator it = search_.text();
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            it.setIndex(search_.matchedIndex_);
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (it.getIndex() < limit) {
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result.append(it.current());
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                it.next();
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            it.setIndex(search_.matchedIndex_);
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result.toString();
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return null;
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // miscellaneous public methods -----------------------------------------
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the index of the next point at which the text matches the
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * search pattern, starting from the current position
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iterator is adjusted so that its current index (as returned by
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #getIndex}) is the match position if one was found.
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If a match is not found, {@link #DONE} will be returned and
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the iterator will be adjusted to a position after the end of the text
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * string.
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The index of the next match after the current position,
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *          or {@link #DONE} if there are no more matches.
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getIndex
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int next() {
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int index = getIndex(); // offset = getOffset() in ICU4C
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int matchindex = search_.matchedIndex_;
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int matchlength = search_.matchedLength();
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.reset_ = false;
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.isForwardSearching_) {
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int endIdx = search_.endIndex();
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index == endIdx || matchindex == endIdx ||
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (matchindex != DONE &&
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    matchindex + matchlength >= endIdx)) {
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setMatchNotFound();
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return DONE;
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // switching direction.
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if matchedIndex == DONE, it means that either a
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // setIndex (setOffset in C) has been called or that previous ran off the text
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // string. the iterator would have been set to offset 0 if a
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // match is not found.
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.isForwardSearching_ = true;
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.matchedIndex_ != DONE) {
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // there's no need to set the collation element iterator
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // the next call to next will set the offset.
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return matchindex;
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (matchlength > 0) {
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if matchlength is 0 we are at the start of the iteration
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.isOverlap_) {
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index++;
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index += matchlength;
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handleNext(index);
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the index of the previous point at which the string text
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * matches the search pattern, starting at the current position.
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The iterator is adjusted so that its current index (as returned by
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #getIndex}) is the match position if one was found.
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If a match is not found, {@link #DONE} will be returned and
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the iterator will be adjusted to the index {@link #DONE}.
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The index of the previous match before the current position,
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *          or {@link #DONE} if there are no more matches.
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getIndex
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int previous() {
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int index;  // offset in ICU4C
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.reset_) {
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = search_.endIndex();   // m_search_->textLength in ICU4C
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.isForwardSearching_ = false;
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.reset_ = false;
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            setIndex(index);
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = getIndex();
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int matchindex = search_.matchedIndex_;
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.isForwardSearching_) {
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // switching direction.
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if matchedIndex == DONE, it means that either a
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // setIndex (setOffset in C) has been called or that next ran off the text
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // string. the iterator would have been set to offset textLength if
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // a match is not found.
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.isForwardSearching_ = false;
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (matchindex != DONE) {
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return matchindex;
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int startIdx = search_.beginIndex();
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (index == startIdx || matchindex == startIdx) {
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // not enough characters to match
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setMatchNotFound();
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return DONE;
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (matchindex != DONE) {
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (search_.isOverlap_) {
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                matchindex += search_.matchedLength() - 2;
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return handlePrevious(matchindex);
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handlePrevious(index);
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Return true if the overlapping property has been set.
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See {@link #setOverlapping(boolean)} for more information.
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setOverlapping
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return true if the overlapping property has been set, false otherwise
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.8
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isOverlapping() {
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return search_.isOverlap_;
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //TODO: We may add APIs below to match ICU4C APIs
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // isCanonicalMatch
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Resets the iteration.
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * Search will begin at the start of the text string if a forward
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * iteration is initiated before a backwards iteration. Otherwise if a
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * backwards iteration is initiated before a forwards iteration, the
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * search will begin at the end of the text string.
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    *
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    * @stable ICU 2.0
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    */
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void reset() {
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setMatchNotFound();
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setIndex(search_.beginIndex());
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isOverlap_ = false;
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isCanonicalMatch_ = false;
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isForwardSearching_ = true;
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.reset_ = true;
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the first index at which the string text matches the search
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * pattern. The iterator is adjusted so that its current index (as
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * returned by {@link #getIndex()}) is the match position if one
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * was found.
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If a match is not found, {@link #DONE} will be returned and
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the iterator will be adjusted to the index {@link #DONE}.
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The character index of the first match, or
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         {@link #DONE} if there are no matches.
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getIndex
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int first() {
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int startIdx = search_.beginIndex();
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setIndex(startIdx);
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handleNext(startIdx);
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the first index equal or greater than <tt>position</tt> at which the
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * string text matches the search pattern. The iterator is adjusted so
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * that its current index (as returned by {@link #getIndex()}) is the
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * match position if one was found.
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If a match is not found, {@link #DONE} will be returned and the
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * iterator will be adjusted to the index {@link #DONE}.
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param  position where search if to start from.
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The character index of the first match following
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         <tt>position</tt>, or {@link #DONE} if there are no matches.
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IndexOutOfBoundsException    If position is less than or greater
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *      than the text range for searching.
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getIndex
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int following(int position) {
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setIndex(position);
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handleNext(position);
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the last index in the target text at which it matches the
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * search pattern. The iterator is adjusted so that its current index
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (as returned by {@link #getIndex}) is the match position if one was
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * found.
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If a match is not found, {@link #DONE} will be returned and
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the iterator will be adjusted to the index {@link #DONE}.
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The index of the first match, or {@link #DONE} if
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         there are no matches.
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getIndex
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int last() {
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int endIdx = search_.endIndex();
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setIndex(endIdx);
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handlePrevious(endIdx);
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the first index less than <tt>position</tt> at which the string
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * text matches the search pattern. The iterator is adjusted so that its
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * current index (as returned by {@link #getIndex}) is the match
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * position if one was found. If a match is not found,
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #DONE} will be returned and the iterator will be
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * adjusted to the index {@link #DONE}
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * When the overlapping option ({@link #isOverlapping}) is off, the last index of the
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * result match is always less than <tt>position</tt>.
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * When the overlapping option is on, the result match may span across
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <tt>position</tt>.
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param  position where search is to start from.
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The character index of the first match preceding
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         <tt>position</tt>, or {@link #DONE} if there are
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         no matches.
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @throws IndexOutOfBoundsException If position is less than or greater than
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                                   the text range for searching
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getIndex
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public final int preceding(int position) {
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setIndex(position);
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return handlePrevious(position);
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // protected constructor ----------------------------------------------
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Protected constructor for use by subclasses.
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Initializes the iterator with the argument target text for searching
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and sets the BreakIterator.
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * See class documentation for more details on the use of the target text
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and {@link BreakIterator}.
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param target The target text to be searched.
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param breaker A {@link BreakIterator} that is used to determine the
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                boundaries of a logical match. This argument can be null.
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @exception IllegalArgumentException thrown when argument target is null,
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            or of length 0
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see BreakIterator
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected SearchIterator(CharacterIterator target, BreakIterator breaker)
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (target == null
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            || (target.getEndIndex() - target.getBeginIndex()) == 0) {
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new IllegalArgumentException(
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                   "Illegal argument target. " +
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                   " Argument can not be null or of length 0");
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setTarget(target);
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setBreakIter(breaker);
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (search_.breakIter() != null) {
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            search_.breakIter().setText((CharacterIterator)target.clone());
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isOverlap_ = false;
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isCanonicalMatch_ = false;
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.isForwardSearching_ = true;
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.reset_ = true;
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.matchedIndex_ = DONE;
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setMatchedLength(0);
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // protected methods --------------------------------------------------
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the length of the most recent match in the target text.
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Subclasses' handleNext() and handlePrevious() methods should call this
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * after they find a match in the target text.
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param length new length to set
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #handleNext
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #handlePrevious
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void setMatchLength(int length)
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setMatchedLength(length);
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Abstract method which subclasses override to provide the mechanism
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * for finding the next match in the target text. This allows different
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * subclasses to provide different search algorithms.
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If a match is found, the implementation should return the index at
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * which the match starts and should call
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #setMatchLength} with the number of characters
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * in the target text that make up the match. If no match is found, the
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * method should return {@link #DONE}.
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param start The index in the target text at which the search
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *              should start.
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return index at which the match starts, else if match is not found
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         {@link #DONE} is returned
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setMatchLength
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected abstract int handleNext(int start);
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Abstract method which subclasses override to provide the mechanism for
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * finding the previous match in the target text. This allows different
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * subclasses to provide different search algorithms.
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If a match is found, the implementation should return the index at
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * which the match starts and should call
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #setMatchLength} with the number of characters
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * in the target text that make up the match. If no match is found, the
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * method should return {@link #DONE}.
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param startAt   The index in the target text at which the search
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *                  should start.
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return index at which the match starts, else if match is not found
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         {@link #DONE} is returned
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setMatchLength
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected abstract int handlePrevious(int startAt);
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //TODO: This protected method is @stable 2.0 in ICU4C
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected void setMatchNotFound() {
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.matchedIndex_ = DONE;
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.setMatchedLength(0);
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Option to control how collation elements are compared.
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The default value will be {@link #STANDARD_ELEMENT_COMPARISON}.
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * PATTERN_BASE_WEIGHT_IS_WILDCARD supports "asymmetric search" as described in
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <a href="http://www.unicode.org/reports/tr10/#Asymmetric_Search">
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * UTS #10 Unicode Collation Algorithm</a>, while ANY_BASE_WEIGHT_IS_WILDCARD
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * supports a related option in which "unmarked" characters in either the
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * pattern or the searched text are treated as wildcards that match marked or
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * unmarked versions of the same character.
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setElementComparisonType(ElementComparisonType)
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getElementComparisonType()
735f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public enum ElementComparisonType {
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Standard collation element comparison at the specified collator strength.
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
741f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert         * @stable ICU 53
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        STANDARD_ELEMENT_COMPARISON,
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Collation element comparison is modified to effectively provide behavior
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * between the specified strength and strength - 1.
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * <p>
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Collation elements in the pattern that have the base weight for the specified
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * strength are treated as "wildcards" that match an element with any other
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * weight at that collation level in the searched text. For example, with a
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * secondary-strength English collator, a plain 'e' in the pattern will match
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * a plain e or an e with any diacritic in the searched text, but an e with
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * diacritic in the pattern will only match an e with the same diacritic in
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * the searched text.
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
756f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert         * @stable ICU 53
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        PATTERN_BASE_WEIGHT_IS_WILDCARD,
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Collation element comparison is modified to effectively provide behavior
7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * between the specified strength and strength - 1.
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * <p>
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Collation elements in either the pattern or the searched text that have the
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * base weight for the specified strength are treated as "wildcards" that match
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * an element with any other weight at that collation level. For example, with
7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * a secondary-strength English collator, a plain 'e' in the pattern will match
7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * a plain e or an e with any diacritic in the searched text, but an e with
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * diacritic in the pattern will only match an e with the same diacritic or a
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * plain e in the searched text.
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
772f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert         * @stable ICU 53
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ANY_BASE_WEIGHT_IS_WILDCARD
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sets the collation element comparison type.
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The default comparison type is {@link ElementComparisonType#STANDARD_ELEMENT_COMPARISON}.
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see ElementComparisonType
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #getElementComparisonType()
784f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void setElementComparisonType(ElementComparisonType type) {
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        search_.elementComparisonType_ = type;
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the collation element comparison type.
7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see ElementComparisonType
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see #setElementComparisonType(ElementComparisonType)
795f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * @stable ICU 53
7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public ElementComparisonType getElementComparisonType() {
7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return search_.elementComparisonType_;
7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
801