17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 3f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert * Copyright (C) 1996-2015, International Business Machines Corporation and * 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. * 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.CharacterIterator; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>SearchIterator</tt> is an abstract base class that provides 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * methods to search for a pattern within a text string. Instances of 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>SearchIterator</tt> maintain a current position and scan over the 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * target text, returning the indices the pattern is matched and the length 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of each match. 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>SearchIterator</tt> defines a protocol for text searching. 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Subclasses provide concrete implementations of various search algorithms. 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For example, <tt>StringSearch</tt> implements language-sensitive pattern 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * matching based on the comparison rules defined in a 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>RuleBasedCollator</tt> object. 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Other options for searching include using a BreakIterator to restrict 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the points at which matches are detected. 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>SearchIterator</tt> provides an API that is similar to that of 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * other text iteration classes such as <tt>BreakIterator</tt>. Using 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * this class, it is easy to scan through text looking for all occurrences of 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a given pattern. The following example uses a <tt>StringSearch</tt> 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * object to find all instances of "fox" in the target string. Any other 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * subclass of <tt>SearchIterator</tt> can be used in an identical 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * manner. 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre><code> 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * String target = "The quick brown fox jumped over the lazy fox"; 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * String pattern = "fox"; 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * SearchIterator iter = new StringSearch(pattern, target); 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for (int pos = iter.first(); pos != SearchIterator.DONE; 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * pos = iter.next()) { 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * System.out.println("Found match at " + pos + 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * ", length is " + iter.getMatchLength()); 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * } 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </code></pre> 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Laura Werner, synwee 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see BreakIterator 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see RuleBasedCollator 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic abstract class SearchIterator 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert{ 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The BreakIterator to define the boundaries of a logical match. 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This value can be a null. 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * See class documentation for more information. 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #setBreakIterator(BreakIterator) 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getBreakIterator 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see BreakIterator 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected BreakIterator breakIterator; 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Target text for searching. 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #setTarget(CharacterIterator) 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getTarget 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected CharacterIterator targetText; 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Length of the most current match in target text. 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Value 0 is the default value. 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #setMatchLength 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getMatchLength 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected int matchLength; 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Java port of ICU4C struct USearch (usrchimp.h) 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Note: 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * ICU4J already exposed some protected members such as 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * targetText, breakIterator and matchedLength as a part of stable 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * APIs. In ICU4C, they are exposed through USearch struct, 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * although USearch struct itself is internal API. 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This class was created for making ICU4J code parallel to 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * ICU4C implementation. ICU4J implementation access member 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * fields like C struct (e.g. search_.isOverlap_) mostly, except 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * fields already exposed as protected member (e.g. search_.text()). 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert final class Search { 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharacterIterator text() { 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return SearchIterator.this.targetText; 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert void setTarget(CharacterIterator text) { 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert SearchIterator.this.targetText = text; 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** Flag to indicate if overlapping search is to be done. 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert E.g. looking for "aa" in "aaa" will yield matches at offset 0 and 1. */ 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean isOverlap_; 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean isCanonicalMatch_; 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ElementComparisonType elementComparisonType_; 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BreakIterator internalBreakIter_; 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BreakIterator breakIter() { 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return SearchIterator.this.breakIterator; 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert void setBreakIter(BreakIterator breakIter) { 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert SearchIterator.this.breakIterator = breakIter; 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int matchedIndex_; 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int matchedLength() { 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return SearchIterator.this.matchLength; 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert void setMatchedLength(int matchedLength) { 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert SearchIterator.this.matchLength = matchedLength; 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** Flag indicates if we are doing a forwards search */ 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean isForwardSearching_; 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** Flag indicates if we are at the start of a string search. 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert This indicates that we are in forward search and at the start of m_text. */ 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean reset_; 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Convenient methods for accessing begin/end index of the 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // target text. These are ICU4J only and are not data fields. 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int beginIndex() { 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (targetText == null) { 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return targetText.getBeginIndex(); 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int endIndex() { 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (targetText == null) { 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return targetText.getEndIndex(); 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Search search_ = new Search(); 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // public data members ------------------------------------------------- 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * DONE is returned by previous() and next() after all valid matches have 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * been returned, and by first() and last() if there are no matches at all. 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #previous 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #next 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final int DONE = -1; 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // public methods ----------------------------------------------------- 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // public setters ----------------------------------------------------- 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Sets the position in the target text at which the next search will start. 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This method clears any previous match. 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </p> 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param position position from which to start the next search 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @exception IndexOutOfBoundsException thrown if argument position is out 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * of the target text range. 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getIndex 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.8 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void setIndex(int position) { 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (position < search_.beginIndex() 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert || position > search_.endIndex()) { 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IndexOutOfBoundsException( 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "setIndex(int) expected position to be between " + 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.beginIndex() + " and " + search_.endIndex()); 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.reset_ = false; 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.setMatchedLength(0); 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.matchedIndex_ = DONE; 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Determines whether overlapping matches are returned. See the class 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * documentation for more information about overlapping matches. 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The default setting of this property is false 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param allowOverlap flag indicator if overlapping matches are allowed 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #isOverlapping 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.8 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void setOverlapping(boolean allowOverlap) { 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.isOverlap_ = allowOverlap; 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Set the BreakIterator that will be used to restrict the points 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * at which matches are detected. 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param breakiter A BreakIterator that will be used to restrict the 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * points at which matches are detected. If a match is 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * found, but the match's start or end index is not a 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * boundary as determined by the {@link BreakIterator}, 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the match will be rejected and another will be searched 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for. If this parameter is <tt>null</tt>, no break 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * detection is attempted. 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see BreakIterator 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void setBreakIterator(BreakIterator breakiter) { 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.setBreakIter(breakiter); 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.breakIter() != null) { 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Create a clone of CharacterItearator, so it won't 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // affect the position currently held by search_.text() 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.text() != null) { 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.breakIter().setText((CharacterIterator)search_.text().clone()); 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Set the target text to be searched. Text iteration will then begin at 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the start of the text string. This method is useful if you want to 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * reuse an iterator to search within a different body of text. 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param text new text iterator to look for match, 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @exception IllegalArgumentException thrown when text is null or has 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 0 length 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getTarget 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.4 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void setTarget(CharacterIterator text) 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (text == null || text.getEndIndex() == text.getIndex()) { 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException("Illegal null or empty text"); 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.setIndex(text.getBeginIndex()); 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.setTarget(text); 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.matchedIndex_ = DONE; 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.setMatchedLength(0); 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.reset_ = true; 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.isForwardSearching_ = true; 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.breakIter() != null) { 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Create a clone of CharacterItearator, so it won't 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // affect the position currently held by search_.text() 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.breakIter().setText((CharacterIterator)text.clone()); 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.internalBreakIter_ != null) { 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.internalBreakIter_.setText((CharacterIterator)text.clone()); 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //TODO: We may add APIs below to match ICU4C APIs 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // setCanonicalMatch 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // public getters ---------------------------------------------------- 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the index to the match in the text string that was searched. 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This call returns a valid result only after a successful call to 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}. 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Just after construction, or after a searching method returns 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #DONE}, this method will return {@link #DONE}. 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Use {@link #getMatchLength} to get the matched string length. 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return index of a substring within the text string that is being 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * searched. 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #first 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #next 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #previous 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #last 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getMatchStart() { 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return search_.matchedIndex_; 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Return the current index in the text being searched. 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If the iteration has gone past the end of the text 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (or past the beginning for a backwards search), {@link #DONE} 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * is returned. 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return current index in the text being searched. 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.8 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public abstract int getIndex(); 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the length of text in the string which matches the search 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * pattern. This call returns a valid result only after a successful call 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * to {@link #first}, {@link #next}, {@link #previous}, or {@link #last}. 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Just after construction, or after a searching method returns 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #DONE}, this method will return 0. 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The length of the match in the target text, or 0 if there 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * is no match currently. 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #first 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #next 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #previous 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #last 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getMatchLength() { 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return search_.matchedLength(); 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the BreakIterator that is used to restrict the indexes at which 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * matches are detected. This will be the same object that was passed to 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the constructor or to {@link #setBreakIterator}. 3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If the {@link BreakIterator} has not been set, <tt>null</tt> will be returned. 3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * See {@link #setBreakIterator} for more information. 3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the BreakIterator set to restrict logic matches 3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #setBreakIterator 3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see BreakIterator 3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public BreakIterator getBreakIterator() { 3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return search_.breakIter(); 3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Return the string text to be searched. 3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return text string to be searched. 3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public CharacterIterator getTarget() { 3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return search_.text(); 3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the text that was matched by the most recent call to 3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}. 3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If the iterator is not pointing at a valid match (e.g. just after 3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * construction or after {@link #DONE} has been returned, 3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * returns an empty string. 3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the substring in the target test of the most recent match, 3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or null if there is no match currently. 3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #first 3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #next 3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #previous 3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #last 3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String getMatchedText() { 3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.matchedLength() > 0) { 3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limit = search_.matchedIndex_ + search_.matchedLength(); 3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder result = new StringBuilder(search_.matchedLength()); 3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharacterIterator it = search_.text(); 3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert it.setIndex(search_.matchedIndex_); 3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (it.getIndex() < limit) { 3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append(it.current()); 3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert it.next(); 3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert it.setIndex(search_.matchedIndex_); 3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.toString(); 3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return null; 3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // miscellaneous public methods ----------------------------------------- 3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the index of the next point at which the text matches the 3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * search pattern, starting from the current position 3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The iterator is adjusted so that its current index (as returned by 3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #getIndex}) is the match position if one was found. 3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If a match is not found, {@link #DONE} will be returned and 3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the iterator will be adjusted to a position after the end of the text 3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * string. 3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The index of the next match after the current position, 3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or {@link #DONE} if there are no more matches. 3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getIndex 3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int next() { 3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int index = getIndex(); // offset = getOffset() in ICU4C 4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int matchindex = search_.matchedIndex_; 4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int matchlength = search_.matchedLength(); 4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.reset_ = false; 4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.isForwardSearching_) { 4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int endIdx = search_.endIndex(); 4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (index == endIdx || matchindex == endIdx || 4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert (matchindex != DONE && 4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert matchindex + matchlength >= endIdx)) { 4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setMatchNotFound(); 4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return DONE; 4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // switching direction. 4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if matchedIndex == DONE, it means that either a 4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // setIndex (setOffset in C) has been called or that previous ran off the text 4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // string. the iterator would have been set to offset 0 if a 4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // match is not found. 4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.isForwardSearching_ = true; 4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.matchedIndex_ != DONE) { 4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // there's no need to set the collation element iterator 4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the next call to next will set the offset. 4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return matchindex; 4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (matchlength > 0) { 4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if matchlength is 0 we are at the start of the iteration 4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.isOverlap_) { 4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index++; 4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index += matchlength; 4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return handleNext(index); 4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the index of the previous point at which the string text 4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * matches the search pattern, starting at the current position. 4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The iterator is adjusted so that its current index (as returned by 4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #getIndex}) is the match position if one was found. 4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If a match is not found, {@link #DONE} will be returned and 4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the iterator will be adjusted to the index {@link #DONE}. 4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The index of the previous match before the current position, 4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or {@link #DONE} if there are no more matches. 4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getIndex 4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int previous() { 4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int index; // offset in ICU4C 4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.reset_) { 4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index = search_.endIndex(); // m_search_->textLength in ICU4C 4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.isForwardSearching_ = false; 4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.reset_ = false; 4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setIndex(index); 4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index = getIndex(); 4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int matchindex = search_.matchedIndex_; 4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.isForwardSearching_) { 4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // switching direction. 4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if matchedIndex == DONE, it means that either a 4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // setIndex (setOffset in C) has been called or that next ran off the text 4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // string. the iterator would have been set to offset textLength if 4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // a match is not found. 4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.isForwardSearching_ = false; 4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (matchindex != DONE) { 4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return matchindex; 4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int startIdx = search_.beginIndex(); 4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (index == startIdx || matchindex == startIdx) { 4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // not enough characters to match 4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setMatchNotFound(); 4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return DONE; 4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (matchindex != DONE) { 4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.isOverlap_) { 4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert matchindex += search_.matchedLength() - 2; 4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return handlePrevious(matchindex); 4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return handlePrevious(index); 4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Return true if the overlapping property has been set. 4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * See {@link #setOverlapping(boolean)} for more information. 4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #setOverlapping 4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return true if the overlapping property has been set, false otherwise 4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.8 4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean isOverlapping() { 5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return search_.isOverlap_; 5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //TODO: We may add APIs below to match ICU4C APIs 5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // isCanonicalMatch 5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Resets the iteration. 5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Search will begin at the start of the text string if a forward 5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * iteration is initiated before a backwards iteration. Otherwise if a 5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * backwards iteration is initiated before a forwards iteration, the 5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * search will begin at the end of the text string. 5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void reset() { 5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setMatchNotFound(); 5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setIndex(search_.beginIndex()); 5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.isOverlap_ = false; 5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.isCanonicalMatch_ = false; 5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON; 5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.isForwardSearching_ = true; 5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.reset_ = true; 5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the first index at which the string text matches the search 5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * pattern. The iterator is adjusted so that its current index (as 5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * returned by {@link #getIndex()}) is the match position if one 5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * was found. 5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If a match is not found, {@link #DONE} will be returned and 5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the iterator will be adjusted to the index {@link #DONE}. 5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The character index of the first match, or 5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #DONE} if there are no matches. 5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getIndex 5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final int first() { 5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int startIdx = search_.beginIndex(); 5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setIndex(startIdx); 5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return handleNext(startIdx); 5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the first index equal or greater than <tt>position</tt> at which the 5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * string text matches the search pattern. The iterator is adjusted so 5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * that its current index (as returned by {@link #getIndex()}) is the 5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * match position if one was found. 5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If a match is not found, {@link #DONE} will be returned and the 5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * iterator will be adjusted to the index {@link #DONE}. 5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param position where search if to start from. 5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The character index of the first match following 5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>position</tt>, or {@link #DONE} if there are no matches. 5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @throws IndexOutOfBoundsException If position is less than or greater 5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * than the text range for searching. 5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getIndex 5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final int following(int position) { 5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setIndex(position); 5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return handleNext(position); 5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the last index in the target text at which it matches the 5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * search pattern. The iterator is adjusted so that its current index 5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (as returned by {@link #getIndex}) is the match position if one was 5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * found. 5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If a match is not found, {@link #DONE} will be returned and 5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the iterator will be adjusted to the index {@link #DONE}. 5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The index of the first match, or {@link #DONE} if 5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * there are no matches. 5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getIndex 5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final int last() { 5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int endIdx = search_.endIndex(); 5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setIndex(endIdx); 5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return handlePrevious(endIdx); 5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the first index less than <tt>position</tt> at which the string 5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * text matches the search pattern. The iterator is adjusted so that its 5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * current index (as returned by {@link #getIndex}) is the match 5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * position if one was found. If a match is not found, 5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #DONE} will be returned and the iterator will be 5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * adjusted to the index {@link #DONE} 5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * When the overlapping option ({@link #isOverlapping}) is off, the last index of the 5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * result match is always less than <tt>position</tt>. 5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * When the overlapping option is on, the result match may span across 5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>position</tt>. 5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param position where search is to start from. 6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The character index of the first match preceding 6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tt>position</tt>, or {@link #DONE} if there are 6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * no matches. 6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @throws IndexOutOfBoundsException If position is less than or greater than 6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the text range for searching 6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getIndex 6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public final int preceding(int position) { 6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setIndex(position); 6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return handlePrevious(position); 6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // protected constructor ---------------------------------------------- 6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Protected constructor for use by subclasses. 6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Initializes the iterator with the argument target text for searching 6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and sets the BreakIterator. 6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * See class documentation for more details on the use of the target text 6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and {@link BreakIterator}. 6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param target The target text to be searched. 6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param breaker A {@link BreakIterator} that is used to determine the 6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * boundaries of a logical match. This argument can be null. 6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @exception IllegalArgumentException thrown when argument target is null, 6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or of length 0 6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see BreakIterator 6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected SearchIterator(CharacterIterator target, BreakIterator breaker) 6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (target == null 6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert || (target.getEndIndex() - target.getBeginIndex()) == 0) { 6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException( 6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "Illegal argument target. " + 6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert " Argument can not be null or of length 0"); 6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.setTarget(target); 6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.setBreakIter(breaker); 6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (search_.breakIter() != null) { 6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.breakIter().setText((CharacterIterator)target.clone()); 6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.isOverlap_ = false; 6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.isCanonicalMatch_ = false; 6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.elementComparisonType_ = ElementComparisonType.STANDARD_ELEMENT_COMPARISON; 6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.isForwardSearching_ = true; 6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.reset_ = true; 6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.matchedIndex_ = DONE; 6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.setMatchedLength(0); 6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // protected methods -------------------------------------------------- 6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Sets the length of the most recent match in the target text. 6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Subclasses' handleNext() and handlePrevious() methods should call this 6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * after they find a match in the target text. 6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param length new length to set 6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #handleNext 6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #handlePrevious 6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected void setMatchLength(int length) 6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.setMatchedLength(length); 6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Abstract method which subclasses override to provide the mechanism 6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for finding the next match in the target text. This allows different 6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * subclasses to provide different search algorithms. 6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If a match is found, the implementation should return the index at 6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * which the match starts and should call 6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #setMatchLength} with the number of characters 6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * in the target text that make up the match. If no match is found, the 6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * method should return {@link #DONE}. 6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param start The index in the target text at which the search 6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * should start. 6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return index at which the match starts, else if match is not found 6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #DONE} is returned 6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #setMatchLength 6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected abstract int handleNext(int start); 6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Abstract method which subclasses override to provide the mechanism for 6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * finding the previous match in the target text. This allows different 6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * subclasses to provide different search algorithms. 6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If a match is found, the implementation should return the index at 6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * which the match starts and should call 6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #setMatchLength} with the number of characters 6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * in the target text that make up the match. If no match is found, the 7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * method should return {@link #DONE}. 7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param startAt The index in the target text at which the search 7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * should start. 7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return index at which the match starts, else if match is not found 7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@link #DONE} is returned 7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #setMatchLength 7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected abstract int handlePrevious(int startAt); 7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //TODO: This protected method is @stable 2.0 in ICU4C 7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected void setMatchNotFound() { 7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.matchedIndex_ = DONE; 7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.setMatchedLength(0); 7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Option to control how collation elements are compared. 7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The default value will be {@link #STANDARD_ELEMENT_COMPARISON}. 7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * PATTERN_BASE_WEIGHT_IS_WILDCARD supports "asymmetric search" as described in 7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <a href="http://www.unicode.org/reports/tr10/#Asymmetric_Search"> 7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UTS #10 Unicode Collation Algorithm</a>, while ANY_BASE_WEIGHT_IS_WILDCARD 7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * supports a related option in which "unmarked" characters in either the 7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * pattern or the searched text are treated as wildcards that match marked or 7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * unmarked versions of the same character. 7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #setElementComparisonType(ElementComparisonType) 7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getElementComparisonType() 735f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert * @stable ICU 53 7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public enum ElementComparisonType { 7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Standard collation element comparison at the specified collator strength. 7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 741f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert * @stable ICU 53 7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert STANDARD_ELEMENT_COMPARISON, 7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Collation element comparison is modified to effectively provide behavior 7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * between the specified strength and strength - 1. 7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Collation elements in the pattern that have the base weight for the specified 7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * strength are treated as "wildcards" that match an element with any other 7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * weight at that collation level in the searched text. For example, with a 7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * secondary-strength English collator, a plain 'e' in the pattern will match 7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a plain e or an e with any diacritic in the searched text, but an e with 7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * diacritic in the pattern will only match an e with the same diacritic in 7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the searched text. 7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 756f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert * @stable ICU 53 7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert PATTERN_BASE_WEIGHT_IS_WILDCARD, 7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Collation element comparison is modified to effectively provide behavior 7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * between the specified strength and strength - 1. 7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Collation elements in either the pattern or the searched text that have the 7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * base weight for the specified strength are treated as "wildcards" that match 7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * an element with any other weight at that collation level. For example, with 7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a secondary-strength English collator, a plain 'e' in the pattern will match 7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a plain e or an e with any diacritic in the searched text, but an e with 7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * diacritic in the pattern will only match an e with the same diacritic or a 7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * plain e in the searched text. 7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 772f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert * @stable ICU 53 7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ANY_BASE_WEIGHT_IS_WILDCARD 7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Sets the collation element comparison type. 7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The default comparison type is {@link ElementComparisonType#STANDARD_ELEMENT_COMPARISON}. 7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see ElementComparisonType 7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getElementComparisonType() 784f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert * @stable ICU 53 7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void setElementComparisonType(ElementComparisonType type) { 7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert search_.elementComparisonType_ = type; 7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the collation element comparison type. 7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see ElementComparisonType 7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #setElementComparisonType(ElementComparisonType) 795f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert * @stable ICU 53 7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public ElementComparisonType getElementComparisonType() { 7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return search_.elementComparisonType_; 7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 801