1/*
2 *******************************************************************************
3 * Copyright (C) 1996-2014, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7package com.ibm.icu.text;
8
9/**
10 * <code>UnicodeFilter</code> defines a protocol for selecting a
11 * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
12 * Currently, filters are used in conjunction with classes like
13 * <a href="Transliterator.html" title="class in com.ibm.icu.text"><code>Transliterator</code></a>
14 * to only process selected characters through a
15 * transformation.
16 * @stable ICU 2.0
17 */
18public abstract class UnicodeFilter implements UnicodeMatcher {
19
20    /**
21     * Returns <tt>true</tt> for characters that are in the selected
22     * subset.  In other words, if a character is <b>to be
23     * filtered</b>, then <tt>contains()</tt> returns
24     * <b><tt>false</tt></b>.
25     * @stable ICU 2.0
26     */
27    public abstract boolean contains(int c);
28
29    /**
30     * Default implementation of UnicodeMatcher::matches() for Unicode
31     * filters.  Matches a single 16-bit code unit at offset.
32     * @stable ICU 2.0
33     */
34    public int matches(Replaceable text,
35                       int[] offset,
36                       int limit,
37                       boolean incremental) {
38        int c;
39        if (offset[0] < limit &&
40            contains(c = text.char32At(offset[0]))) {
41            offset[0] += UTF16.getCharCount(c);
42            return U_MATCH;
43        }
44        if (offset[0] > limit && contains(text.char32At(offset[0]))) {
45            // Backup offset by 1, unless the preceding character is a
46            // surrogate pair -- then backup by 2 (keep offset pointing at
47            // the lead surrogate).
48            --offset[0];
49            if (offset[0] >= 0) {
50                offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1;
51            }
52            return U_MATCH;
53        }
54        if (incremental && offset[0] == limit) {
55            return U_PARTIAL_MATCH;
56        }
57        return U_MISMATCH;
58    }
59
60    // TODO Remove this when the JDK property implements MemberDoc.isSynthetic
61    /**
62     * (This should not be here; it is declared to make CheckTags
63     * happy.  Java inserts a synthetic constructor and CheckTags
64     * can't tell that it's synthetic.)
65     *
66     * @internal
67     * @deprecated This API is ICU internal only.
68     */
69    @Deprecated
70    protected UnicodeFilter() {}
71}
72