17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*   Copyright (C) 2009-2014, International Business Machines
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*   Corporation and others.  All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ICUUncheckedIOException;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Normalization filtered by a UnicodeSet.
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Normalizes portions of the text contained in the filter set and leaves
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * portions not contained in the filter set unchanged.
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Filtering is done via UnicodeSet.span(..., UnicodeSet.SpanCondition.SIMPLE).
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This class implements all of (and only) the Normalizer2 API.
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * An instance of this class is unmodifiable/immutable.
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Markus W. Scherer
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class FilteredNormalizer2 extends Normalizer2 {
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Constructs a filtered normalizer wrapping any Normalizer2 instance
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and a filter set.
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Both are aliased and must not be modified or deleted while this object
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * is used.
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The filter set should be frozen; otherwise the performance will suffer greatly.
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param n2 wrapped Normalizer2 instance
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param filterSet UnicodeSet which determines the characters to be normalized
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.4
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public FilteredNormalizer2(Normalizer2 n2, UnicodeSet filterSet) {
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        norm2=n2;
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        set=filterSet;
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.4
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringBuilder normalize(CharSequence src, StringBuilder dest) {
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(dest==src) {
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException();
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dest.setLength(0);
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE);
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return dest;
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.6
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Appendable normalize(CharSequence src, Appendable dest) {
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(dest==src) {
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException();
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE);
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.4
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringBuilder normalizeSecondAndAppend(
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuilder first, CharSequence second) {
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return normalizeSecondAndAppend(first, second, true);
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.4
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringBuilder append(StringBuilder first, CharSequence second) {
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return normalizeSecondAndAppend(first, second, false);
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.6
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String getDecomposition(int c) {
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return set.contains(c) ? norm2.getDecomposition(c) : null;
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 49
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String getRawDecomposition(int c) {
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return set.contains(c) ? norm2.getRawDecomposition(c) : null;
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 49
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int composePair(int a, int b) {
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : -1;
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 49
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getCombiningClass(int c) {
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return set.contains(c) ? norm2.getCombiningClass(c) : 0;
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.4
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isNormalized(CharSequence s) {
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE;
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int prevSpanLimit=0; prevSpanLimit<s.length();) {
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int spanLimit=set.span(s, prevSpanLimit, spanCondition);
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                spanCondition=UnicodeSet.SpanCondition.SIMPLE;
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(!norm2.isNormalized(s.subSequence(prevSpanLimit, spanLimit))) {
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            prevSpanLimit=spanLimit;
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.4
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Normalizer.QuickCheckResult quickCheck(CharSequence s) {
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Normalizer.QuickCheckResult result=Normalizer.YES;
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE;
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int prevSpanLimit=0; prevSpanLimit<s.length();) {
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int spanLimit=set.span(s, prevSpanLimit, spanCondition);
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                spanCondition=UnicodeSet.SpanCondition.SIMPLE;
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Normalizer.QuickCheckResult qcResult=
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    norm2.quickCheck(s.subSequence(prevSpanLimit, spanLimit));
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(qcResult==Normalizer.NO) {
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return qcResult;
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(qcResult==Normalizer.MAYBE) {
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    result=qcResult;
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            prevSpanLimit=spanLimit;
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return result;
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.4
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int spanQuickCheckYes(CharSequence s) {
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE;
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int prevSpanLimit=0; prevSpanLimit<s.length();) {
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int spanLimit=set.span(s, prevSpanLimit, spanCondition);
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                spanCondition=UnicodeSet.SpanCondition.SIMPLE;
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int yesLimit=
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevSpanLimit+
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    norm2.spanQuickCheckYes(s.subSequence(prevSpanLimit, spanLimit));
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(yesLimit<spanLimit) {
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return yesLimit;
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            prevSpanLimit=spanLimit;
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return s.length();
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.4
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean hasBoundaryBefore(int c) {
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return !set.contains(c) || norm2.hasBoundaryBefore(c);
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.4
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean hasBoundaryAfter(int c) {
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return !set.contains(c) || norm2.hasBoundaryAfter(c);
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@inheritDoc}
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 4.4
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isInert(int c) {
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return !set.contains(c) || norm2.isInert(c);
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Internal: No argument checking, and appends to dest.
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Pass as input spanCondition the one that is likely to yield a non-zero
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // span length at the start of src.
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // UnicodeSet.SpanCondition.SIMPLE should be passed in for the start of src
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // and UnicodeSet.SpanCondition.NOT_CONTAINED should be passed in if we continue after
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // an in-filter prefix.
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Appendable normalize(CharSequence src, Appendable dest,
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 UnicodeSet.SpanCondition spanCondition) {
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Don't throw away destination buffer between iterations.
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder tempDest=new StringBuilder();
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(int prevSpanLimit=0; prevSpanLimit<src.length();) {
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int spanLimit=set.span(src, prevSpanLimit, spanCondition);
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int spanLength=spanLimit-prevSpanLimit;
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) {
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(spanLength!=0) {
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        dest.append(src, prevSpanLimit, spanLimit);
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    spanCondition=UnicodeSet.SpanCondition.SIMPLE;
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(spanLength!=0) {
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // Not norm2.normalizeSecondAndAppend() because we do not want
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // to modify the non-filter part of dest.
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        dest.append(norm2.normalize(src.subSequence(prevSpanLimit, spanLimit), tempDest));
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED;
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevSpanLimit=spanLimit;
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch(IOException e) {
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new ICUUncheckedIOException(e);
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return dest;
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second,
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                   boolean doNormalize) {
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(first==second) {
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException();
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(first.length()==0) {
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(doNormalize) {
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return normalize(second, first);
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return first.append(second);
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // merge the in-filter suffix of the first string with the in-filter prefix of the second
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prefixLimit=set.span(second, 0, UnicodeSet.SpanCondition.SIMPLE);
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(prefixLimit!=0) {
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CharSequence prefix=second.subSequence(0, prefixLimit);
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int suffixStart=set.spanBack(first, 0x7fffffff, UnicodeSet.SpanCondition.SIMPLE);
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(suffixStart==0) {
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(doNormalize) {
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    norm2.normalizeSecondAndAppend(first, prefix);
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    norm2.append(first, prefix);
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuilder middle=new StringBuilder(
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        first.subSequence(suffixStart, first.length()));
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(doNormalize) {
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    norm2.normalizeSecondAndAppend(middle, prefix);
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    norm2.append(middle, prefix);
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                first.delete(suffixStart, 0x7fffffff).append(middle);
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(prefixLimit<second.length()) {
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CharSequence rest=second.subSequence(prefixLimit, second.length());
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(doNormalize) {
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                normalize(rest, first, UnicodeSet.SpanCondition.NOT_CONTAINED);
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                first.append(rest);
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return first;
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Normalizer2 norm2;
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UnicodeSet set;
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert};
303