17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2014, International Business Machines Corporation and    *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>UnicodeFilter</code> defines a protocol for selecting a
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Currently, filters are used in conjunction with classes like
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <a href="Transliterator.html" title="class in com.ibm.icu.text"><code>Transliterator</code></a>
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * to only process selected characters through a
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transformation.
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic abstract class UnicodeFilter implements UnicodeMatcher {
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns <tt>true</tt> for characters that are in the selected
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * subset.  In other words, if a character is <b>to be
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * filtered</b>, then <tt>contains()</tt> returns
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <b><tt>false</tt></b>.
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract boolean contains(int c);
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Default implementation of UnicodeMatcher::matches() for Unicode
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * filters.  Matches a single 16-bit code unit at offset.
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @stable ICU 2.0
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int matches(Replaceable text,
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       int[] offset,
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       int limit,
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       boolean incremental) {
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c;
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offset[0] < limit &&
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            contains(c = text.char32At(offset[0]))) {
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset[0] += UTF16.getCharCount(c);
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return U_MATCH;
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (offset[0] > limit && contains(text.char32At(offset[0]))) {
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Backup offset by 1, unless the preceding character is a
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // surrogate pair -- then backup by 2 (keep offset pointing at
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the lead surrogate).
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            --offset[0];
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (offset[0] >= 0) {
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1;
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return U_MATCH;
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (incremental && offset[0] == limit) {
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return U_PARTIAL_MATCH;
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return U_MISMATCH;
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO Remove this when the JDK property implements MemberDoc.isSynthetic
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (This should not be here; it is declared to make CheckTags
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * happy.  Java inserts a synthetic constructor and CheckTags
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * can't tell that it's synthetic.)
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @internal
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @deprecated This API is ICU internal only.
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Deprecated
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected UnicodeFilter() {}
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
72