17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2014, International Business Machines Corporation and
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet.SpanCondition;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.OutputInt;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A helper class used to count, replace, and trim CharSequences based on UnicodeSet matches.
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * An instance is immutable (and thus thread-safe) iff the source UnicodeSet is frozen.
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p><b>Note:</b> The counting, deletion, and replacement depend on alternating a {@link SpanCondition} with
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * its inverse. That is, the code spans, then spans for the inverse, then spans, and so on.
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For the inverse, the following mapping is used:</p>
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <ul>
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link UnicodeSet.SpanCondition#SIMPLE} → {@link UnicodeSet.SpanCondition#NOT_CONTAINED}</li>
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link UnicodeSet.SpanCondition#CONTAINED} → {@link UnicodeSet.SpanCondition#NOT_CONTAINED}</li>
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <li>{@link UnicodeSet.SpanCondition#NOT_CONTAINED} → {@link UnicodeSet.SpanCondition#SIMPLE}</li>
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </ul>
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * These are actually not complete inverses. However, the alternating works because there are no gaps.
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * For example, with [a{ab}{bc}], you get the following behavior when scanning forward:
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <table border="1">
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>SIMPLE</th><td>xxx[ab]cyyy</td></tr>
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>CONTAINED</th><td>xxx[abc]yyy</td></tr>
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>NOT_CONTAINED</th><td>[xxx]ab[cyyy]</td></tr>
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </table>
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>So here is what happens when you alternate:
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <table border="1">
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>start</th><td>|xxxabcyyy</td></tr>
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>NOT_CONTAINED</th><td>xxx|abcyyy</td></tr>
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>CONTAINED</th><td>xxxabc|yyy</td></tr>
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <tr><th>NOT_CONTAINED</th><td>xxxabcyyy|</td></tr>
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </table>
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </p>The entire string is traversed.
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @draft ICU 54
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @provisional This is a draft API and might change in a future release of ICU.
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class UnicodeSetSpanner {
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final UnicodeSet unicodeSet;
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Create a spanner from a UnicodeSet. For speed and safety, the UnicodeSet should be frozen. However, this class
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * can be used with a non-frozen version to avoid the cost of freezing.
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the original UnicodeSet
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public UnicodeSetSpanner(UnicodeSet source) {
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        unicodeSet = source;
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the UnicodeSet used for processing. It is frozen iff the original was.
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the construction set.
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public UnicodeSet getUnicodeSet() {
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return unicodeSet;
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean equals(Object other) {
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return other instanceof UnicodeSetSpanner && unicodeSet.equals(((UnicodeSetSpanner) other).unicodeSet);
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int hashCode() {
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return unicodeSet.hashCode();
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Options for replaceFrom and countIn to control how to treat each matched span.
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * It is similar to whether one is replacing [abc] by x, or [abc]* by x.
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public enum CountMethod {
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Collapse spans. That is, modify/count the entire matching span as a single item, instead of separate
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * set elements.
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @draft ICU 54
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @provisional This is a draft API and might change in a future release of ICU.
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        WHOLE_SPAN,
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Use the smallest number of elements in the spanned range for counting and modification,
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * based on the {@link UnicodeSet.SpanCondition}.
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * If the set has no strings, this will be the same as the number of spanned code points.
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * <p>For example, in the string "abab" with SpanCondition.SIMPLE:
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * <ul>
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * <li>spanning with [ab] will count four MIN_ELEMENTS.</li>
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * <li>spanning with [{ab}] will count two MIN_ELEMENTS.</li>
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * <li>spanning with [ab{ab}] will also count two MIN_ELEMENTS.</li>
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * </ul>
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @draft ICU 54
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @provisional This is a draft API and might change in a future release of ICU.
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        MIN_ELEMENTS,
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: could in the future have an additional option MAX_ELEMENTS
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the number of matching characters found in a character sequence,
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * counting by CountMethod.MIN_ELEMENTS using SpanCondition.SIMPLE.
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sequence
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the sequence to count characters in
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the count. Zero if there are none.
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int countIn(CharSequence sequence) {
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return countIn(sequence, CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE);
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the number of matching characters found in a character sequence, using SpanCondition.SIMPLE.
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sequence
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the sequence to count characters in
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param countMethod
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            whether to treat an entire span as a match, or individual elements as matches
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the count. Zero if there are none.
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int countIn(CharSequence sequence, CountMethod countMethod) {
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return countIn(sequence, countMethod, SpanCondition.SIMPLE);
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the number of matching characters found in a character sequence.
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sequence
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the sequence to count characters in
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param countMethod
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            whether to treat an entire span as a match, or individual elements as matches
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param spanCondition
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the spanCondition to use. SIMPLE or CONTAINED means only count the elements in the span;
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            NOT_CONTAINED is the reverse.
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            <br><b>WARNING: </b> when a UnicodeSet contains strings, there may be unexpected behavior in edge cases.
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the count. Zero if there are none.
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int countIn(CharSequence sequence, CountMethod countMethod, SpanCondition spanCondition) {
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int count = 0;
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int start = 0;
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpanCondition skipSpan = spanCondition == SpanCondition.NOT_CONTAINED ? SpanCondition.SIMPLE
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                : SpanCondition.NOT_CONTAINED;
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        final int length = sequence.length();
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        OutputInt spanCount = null;
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (start != length) {
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int endOfSpan = unicodeSet.span(sequence, start, skipSpan);
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (endOfSpan == length) {
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (countMethod == CountMethod.WHOLE_SPAN) {
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                start = unicodeSet.span(sequence, endOfSpan, spanCondition);
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                count += 1;
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (spanCount == null) {
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    spanCount = new OutputInt();
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                start = unicodeSet.spanAndCount(sequence, endOfSpan, spanCondition, spanCount);
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                count += spanCount.value;
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return count;
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Delete all the matching spans in sequence, using SpanCondition.SIMPLE
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sequence
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            charsequence to replace matching spans in.
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return modified string.
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String deleteFrom(CharSequence sequence) {
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return replaceFrom(sequence, "", CountMethod.WHOLE_SPAN, SpanCondition.SIMPLE);
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Delete all matching spans in sequence, according to the spanCondition.
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sequence
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            charsequence to replace matching spans in.
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param spanCondition
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            specify whether to modify the matching spans (CONTAINED or SIMPLE) or the non-matching (NOT_CONTAINED)
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return modified string.
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String deleteFrom(CharSequence sequence, SpanCondition spanCondition) {
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return replaceFrom(sequence, "", CountMethod.WHOLE_SPAN, spanCondition);
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Replace all matching spans in sequence by the replacement,
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * counting by CountMethod.MIN_ELEMENTS using SpanCondition.SIMPLE.
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sequence
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            charsequence to replace matching spans in.
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param replacement
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            replacement sequence. To delete, use ""
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return modified string.
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String replaceFrom(CharSequence sequence, CharSequence replacement) {
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return replaceFrom(sequence, replacement, CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE);
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Replace all matching spans in sequence by replacement, according to the CountMethod, using SpanCondition.SIMPLE.
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sequence
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            charsequence to replace matching spans in.
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param replacement
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            replacement sequence. To delete, use ""
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param countMethod
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            whether to treat an entire span as a match, or individual elements as matches
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return modified string.
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod countMethod) {
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return replaceFrom(sequence, replacement, countMethod, SpanCondition.SIMPLE);
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Replace all matching spans in sequence by replacement, according to the countMethod and spanCondition.
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The code alternates spans; see the class doc for {@link UnicodeSetSpanner} for a note about boundary conditions.
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sequence
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            charsequence to replace matching spans in.
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param replacement
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            replacement sequence. To delete, use ""
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param countMethod
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            whether to treat an entire span as a match, or individual elements as matches
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param spanCondition
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            specify whether to modify the matching spans (CONTAINED or SIMPLE) or the non-matching
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            (NOT_CONTAINED)
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return modified string.
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String replaceFrom(CharSequence sequence, CharSequence replacement, CountMethod countMethod,
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SpanCondition spanCondition) {
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpanCondition copySpan = spanCondition == SpanCondition.NOT_CONTAINED ? SpanCondition.SIMPLE
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                : SpanCondition.NOT_CONTAINED;
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        final boolean remove = replacement.length() == 0;
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder result = new StringBuilder();
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO, we can optimize this to
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // avoid this allocation unless needed
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        final int length = sequence.length();
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        OutputInt spanCount = null;
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int endCopy = 0; endCopy != length;) {
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int endModify;
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (countMethod == CountMethod.WHOLE_SPAN) {
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                endModify = unicodeSet.span(sequence, endCopy, spanCondition);
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (spanCount == null) {
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    spanCount = new OutputInt();
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                endModify = unicodeSet.spanAndCount(sequence, endCopy, spanCondition, spanCount);
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (remove || endModify == 0) {
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // do nothing
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (countMethod == CountMethod.WHOLE_SPAN) {
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result.append(replacement);
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (int i = spanCount.value; i > 0; --i) {
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    result.append(replacement);
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (endModify == length) {
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            endCopy = unicodeSet.span(sequence, endModify, copySpan);
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result.append(sequence.subSequence(endModify, endCopy));
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result.toString();
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Options for the trim() method
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public enum TrimOption {
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Trim leading spans.
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @draft ICU 54
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @provisional This is a draft API and might change in a future release of ICU.
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        LEADING,
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Trim leading and trailing spans.
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @draft ICU 54
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @provisional This is a draft API and might change in a future release of ICU.
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BOTH,
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Trim trailing spans.
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @draft ICU 54
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @provisional This is a draft API and might change in a future release of ICU.
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        TRAILING;
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching elements at the start and
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * end of the string, using TrimOption.BOTH and SpanCondition.SIMPLE. For example:
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <pre>
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@code
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   new UnicodeSet("[ab]").trim("abacatbab")}
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </pre>
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ... returns {@code "cat"}.
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sequence
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the sequence to trim
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return a subsequence
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public CharSequence trim(CharSequence sequence) {
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return trim(sequence, TrimOption.BOTH, SpanCondition.SIMPLE);
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching elements at the start or
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * end of the string, using the trimOption and SpanCondition.SIMPLE. For example:
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <pre>
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@code
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   new UnicodeSet("[ab]").trim("abacatbab", TrimOption.LEADING)}
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </pre>
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ... returns {@code "catbab"}.
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sequence
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the sequence to trim
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param trimOption
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            LEADING, TRAILING, or BOTH
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return a subsequence
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public CharSequence trim(CharSequence sequence, TrimOption trimOption) {
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return trim(sequence, trimOption, SpanCondition.SIMPLE);
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns a trimmed sequence (using CharSequence.subsequence()), that omits matching elements at the start or
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * end of the string, depending on the trimOption and spanCondition. For example:
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <pre>
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@code
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   new UnicodeSet("[ab]").trim("abacatbab", TrimOption.LEADING, SpanCondition.SIMPLE)}
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </pre>
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ... returns {@code "catbab"}.
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param sequence
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            the sequence to trim
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param trimOption
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            LEADING, TRAILING, or BOTH
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param spanCondition
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *            SIMPLE, CONTAINED or NOT_CONTAINED
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return a subsequence
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @draft ICU 54
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @provisional This is a draft API and might change in a future release of ICU.
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public CharSequence trim(CharSequence sequence, TrimOption trimOption, SpanCondition spanCondition) {
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int endLeadContained, startTrailContained;
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        final int length = sequence.length();
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (trimOption != TrimOption.TRAILING) {
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            endLeadContained = unicodeSet.span(sequence, spanCondition);
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (endLeadContained == length) {
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return "";
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            endLeadContained = 0;
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (trimOption != TrimOption.LEADING) {
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            startTrailContained = unicodeSet.spanBack(sequence, spanCondition);
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            startTrailContained = length;
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return endLeadContained == 0 && startTrailContained == length ? sequence : sequence.subSequence(
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                endLeadContained, startTrailContained);
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
447