17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2014, International Business Machines Corporation and 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet.SpanCondition; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.OutputInt; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A helper class used to count, replace, and trim CharSequences based on UnicodeSet matches. 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * An instance is immutable (and thus thread-safe) iff the source UnicodeSet is frozen. 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p><b>Note:</b> The counting, deletion, and replacement depend on alternating a {@link SpanCondition} with 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * its inverse. That is, the code spans, then spans for the inverse, then spans, and so on. 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For the inverse, the following mapping is used:</p> 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <ul> 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link UnicodeSet.SpanCondition#SIMPLE} → {@link UnicodeSet.SpanCondition#NOT_CONTAINED}</li> 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link UnicodeSet.SpanCondition#CONTAINED} → {@link UnicodeSet.SpanCondition#NOT_CONTAINED}</li> 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link UnicodeSet.SpanCondition#NOT_CONTAINED} → {@link UnicodeSet.SpanCondition#SIMPLE}</li> 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </ul> 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * These are actually not complete inverses. However, the alternating works because there are no gaps. 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For example, with [a{ab}{bc}], you get the following behavior when scanning forward: 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <table border="1"> 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>SIMPLE</th><td>xxx[ab]cyyy</td></tr> 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>CONTAINED</th><td>xxx[abc]yyy</td></tr> 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>NOT_CONTAINED</th><td>[xxx]ab[cyyy]</td></tr> 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </table> 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>So here is what happens when you alternate: 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p> 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <table border="1"> 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>start</th><td>|xxxabcyyy</td></tr> 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>NOT_CONTAINED</th><td>xxx|abcyyy</td></tr> 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>CONTAINED</th><td>xxxabc|yyy</td></tr> 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>NOT_CONTAINED</th><td>xxxabcyyy|</td></tr> 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </table> 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </p>The entire string is traversed. 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class UnicodeSetSpanner { 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private final UnicodeSet unicodeSet; 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Create a spanner from a UnicodeSet. For speed and safety, the UnicodeSet should be frozen. However, this class 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * can be used with a non-frozen version to avoid the cost of freezing. 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param source 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the original UnicodeSet 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public UnicodeSetSpanner(UnicodeSet source) { 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert unicodeSet = source; 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the UnicodeSet used for processing. It is frozen iff the original was. 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the construction set. 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public UnicodeSet getUnicodeSet() { 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return unicodeSet; 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean equals(Object other) { 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return other instanceof UnicodeSetSpanner && unicodeSet.equals(((UnicodeSetSpanner) other).unicodeSet); 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int hashCode() { 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return unicodeSet.hashCode(); 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Options for replaceFrom and countIn to control how to treat each matched span. 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * It is similar to whether one is replacing [abc] by x, or [abc]* by x. 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public enum CountMethod { 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Collapse spans. That is, modify/count the entire matching span as a single item, instead of separate 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * set elements. 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert WHOLE_SPAN, 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Use the smallest number of elements in the spanned range for counting and modification, 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * based on the {@link UnicodeSet.SpanCondition}. 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If the set has no strings, this will be the same as the number of spanned code points. 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>For example, in the string "abab" with SpanCondition.SIMPLE: 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <ul> 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>spanning with [ab] will count four MIN_ELEMENTS.</li> 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>spanning with [{ab}] will count two MIN_ELEMENTS.</li> 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>spanning with [ab{ab}] will also count two MIN_ELEMENTS.</li> 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </ul> 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert MIN_ELEMENTS, 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Note: could in the future have an additional option MAX_ELEMENTS 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the number of matching characters found in a character sequence, 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * counting by CountMethod.MIN_ELEMENTS using SpanCondition.SIMPLE. 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions. 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sequence 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the sequence to count characters in 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the count. Zero if there are none. 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int countIn(CharSequence sequence) { 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return countIn(sequence, CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE); 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the number of matching characters found in a character sequence, using SpanCondition.SIMPLE. 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions. 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sequence 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the sequence to count characters in 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param countMethod 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * whether to treat an entire span as a match, or individual elements as matches 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the count. Zero if there are none. 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int countIn(CharSequence sequence, CountMethod countMethod) { 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return countIn(sequence, countMethod, SpanCondition.SIMPLE); 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns the number of matching characters found in a character sequence. 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions. 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sequence 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the sequence to count characters in 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param countMethod 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * whether to treat an entire span as a match, or individual elements as matches 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param spanCondition 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the spanCondition to use. SIMPLE or CONTAINED means only count the elements in the span; 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * NOT_CONTAINED is the reverse. 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <br><b>WARNING: </b> when a UnicodeSet contains strings, there may be unexpected behavior in edge cases. 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return the count. Zero if there are none. 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int countIn(CharSequence sequence, CountMethod countMethod, SpanCondition spanCondition) { 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int count = 0; 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int start = 0; 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert SpanCondition skipSpan = spanCondition == SpanCondition.NOT_CONTAINED ? SpanCondition.SIMPLE 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert : SpanCondition.NOT_CONTAINED; 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert final int length = sequence.length(); 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert OutputInt spanCount = null; 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (start != length) { 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int endOfSpan = unicodeSet.span(sequence, start, skipSpan); 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (endOfSpan == length) { 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (countMethod == CountMethod.WHOLE_SPAN) { 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert start = unicodeSet.span(sequence, endOfSpan, spanCondition); 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert count += 1; 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (spanCount == null) { 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert spanCount = new OutputInt(); 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert start = unicodeSet.spanAndCount(sequence, endOfSpan, spanCondition, spanCount); 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert count += spanCount.value; 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return count; 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Delete all the matching spans in sequence, using SpanCondition.SIMPLE 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions. 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sequence 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * charsequence to replace matching spans in. 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return modified string. 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String deleteFrom(CharSequence sequence) { 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return replaceFrom(sequence, "", CountMethod.WHOLE_SPAN, SpanCondition.SIMPLE); 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Delete all matching spans in sequence, according to the spanCondition. 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions. 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sequence 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * charsequence to replace matching spans in. 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param spanCondition 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * specify whether to modify the matching spans (CONTAINED or SIMPLE) or the non-matching (NOT_CONTAINED) 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return modified string. 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String deleteFrom(CharSequence sequence, SpanCondition spanCondition) { 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return replaceFrom(sequence, "", CountMethod.WHOLE_SPAN, spanCondition); 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Replace all matching spans in sequence by the replacement, 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * counting by CountMethod.MIN_ELEMENTS using SpanCondition.SIMPLE. 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions. 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sequence 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * charsequence to replace matching spans in. 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param replacement 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * replacement sequence. To delete, use "" 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return modified string. 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String replaceFrom(CharSequence sequence, CharSequence replacement) { 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return replaceFrom(sequence, replacement, CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE); 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Replace all matching spans in sequence by replacement, according to the CountMethod, using SpanCondition.SIMPLE. 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions. 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sequence 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * charsequence to replace matching spans in. 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param replacement 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * replacement sequence. To delete, use "" 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param countMethod 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * whether to treat an entire span as a match, or individual elements as matches 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return modified string. 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod countMethod) { 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return replaceFrom(sequence, replacement, countMethod, SpanCondition.SIMPLE); 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Replace all matching spans in sequence by replacement, according to the countMethod and spanCondition. 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions. 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sequence 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * charsequence to replace matching spans in. 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param replacement 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * replacement sequence. To delete, use "" 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param countMethod 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * whether to treat an entire span as a match, or individual elements as matches 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param spanCondition 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * specify whether to modify the matching spans (CONTAINED or SIMPLE) or the non-matching 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (NOT_CONTAINED) 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return modified string. 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod countMethod, 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert SpanCondition spanCondition) { 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert SpanCondition copySpan = spanCondition == SpanCondition.NOT_CONTAINED ? SpanCondition.SIMPLE 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert : SpanCondition.NOT_CONTAINED; 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert final boolean remove = replacement.length() == 0; 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder result = new StringBuilder(); 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO, we can optimize this to 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // avoid this allocation unless needed 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert final int length = sequence.length(); 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert OutputInt spanCount = null; 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int endCopy = 0; endCopy != length;) { 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int endModify; 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (countMethod == CountMethod.WHOLE_SPAN) { 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert endModify = unicodeSet.span(sequence, endCopy, spanCondition); 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (spanCount == null) { 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert spanCount = new OutputInt(); 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert endModify = unicodeSet.spanAndCount(sequence, endCopy, spanCondition, spanCount); 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (remove || endModify == 0) { 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // do nothing 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (countMethod == CountMethod.WHOLE_SPAN) { 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append(replacement); 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = spanCount.value; i > 0; --i) { 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append(replacement); 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (endModify == length) { 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert endCopy = unicodeSet.span(sequence, endModify, copySpan); 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.append(sequence.subSequence(endModify, endCopy)); 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.toString(); 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Options for the trim() method 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public enum TrimOption { 3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Trim leading spans. 3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert LEADING, 3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Trim leading and trailing spans. 3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BOTH, 3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Trim trailing spans. 3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert TRAILING; 3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching elements at the start and 3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * end of the string, using TrimOption.BOTH and SpanCondition.SIMPLE. For example: 3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre> 3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@code 3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * new UnicodeSet("[ab]").trim("abacatbab")} 3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre> 3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * ... returns {@code "cat"}. 3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sequence 3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the sequence to trim 3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return a subsequence 3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public CharSequence trim(CharSequence sequence) { 3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return trim(sequence, TrimOption.BOTH, SpanCondition.SIMPLE); 3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching elements at the start or 3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * end of the string, using the trimOption and SpanCondition.SIMPLE. For example: 3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre> 3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@code 3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * new UnicodeSet("[ab]").trim("abacatbab", TrimOption.LEADING)} 3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre> 3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * ... returns {@code "catbab"}. 3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sequence 3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the sequence to trim 3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param trimOption 3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * LEADING, TRAILING, or BOTH 3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return a subsequence 3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public CharSequence trim(CharSequence sequence, TrimOption trimOption) { 4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return trim(sequence, trimOption, SpanCondition.SIMPLE); 4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching elements at the start or 4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * end of the string, depending on the trimOption and spanCondition. For example: 4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre> 4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@code 4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * new UnicodeSet("[ab]").trim("abacatbab", TrimOption.LEADING, SpanCondition.SIMPLE)} 4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre> 4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * ... returns {@code "catbab"}. 4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sequence 4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the sequence to trim 4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param trimOption 4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * LEADING, TRAILING, or BOTH 4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param spanCondition 4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * SIMPLE, CONTAINED or NOT_CONTAINED 4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return a subsequence 4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54 4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU. 4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public CharSequence trim(CharSequence sequence, TrimOption trimOption, SpanCondition spanCondition) { 4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int endLeadContained, startTrailContained; 4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert final int length = sequence.length(); 4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (trimOption != TrimOption.TRAILING) { 4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert endLeadContained = unicodeSet.span(sequence, spanCondition); 4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (endLeadContained == length) { 4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return ""; 4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert endLeadContained = 0; 4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (trimOption != TrimOption.LEADING) { 4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert startTrailContained = unicodeSet.spanBack(sequence, spanCondition); 4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert startTrailContained = length; 4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return endLeadContained == 0 && startTrailContained == length ? sequence : sequence.subSequence( 4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert endLeadContained, startTrailContained); 4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 447