1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 *******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines Corporation and    *
6 * others. All Rights Reserved.                                                *
7 *******************************************************************************
8 */
9package com.ibm.icu.text;
10
11/**
12 * <code>UnicodeFilter</code> defines a protocol for selecting a
13 * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
14 * Currently, filters are used in conjunction with classes like
15 * {@link com.ibm.icu.text.Transliterator}
16 * to only process selected characters through a
17 * transformation.
18 * @stable ICU 2.0
19 */
20@SuppressWarnings("javadoc")    // com.imb.icu.text.Transliterator is in another project
21public abstract class UnicodeFilter implements UnicodeMatcher {
22
23    /**
24     * Returns <tt>true</tt> for characters that are in the selected
25     * subset.  In other words, if a character is <b>to be
26     * filtered</b>, then <tt>contains()</tt> returns
27     * <b><tt>false</tt></b>.
28     * @stable ICU 2.0
29     */
30    public abstract boolean contains(int c);
31
32    /**
33     * Default implementation of UnicodeMatcher::matches() for Unicode
34     * filters.  Matches a single 16-bit code unit at offset.
35     * @stable ICU 2.0
36     */
37    @Override
38    public int matches(Replaceable text,
39                       int[] offset,
40                       int limit,
41                       boolean incremental) {
42        int c;
43        if (offset[0] < limit &&
44            contains(c = text.char32At(offset[0]))) {
45            offset[0] += UTF16.getCharCount(c);
46            return U_MATCH;
47        }
48        if (offset[0] > limit && contains(text.char32At(offset[0]))) {
49            // Backup offset by 1, unless the preceding character is a
50            // surrogate pair -- then backup by 2 (keep offset pointing at
51            // the lead surrogate).
52            --offset[0];
53            if (offset[0] >= 0) {
54                offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1;
55            }
56            return U_MATCH;
57        }
58        if (incremental && offset[0] == limit) {
59            return U_PARTIAL_MATCH;
60        }
61        return U_MISMATCH;
62    }
63
64    // TODO Remove this when the JDK property implements MemberDoc.isSynthetic
65    /**
66     * (This should not be here; it is declared to make CheckTags
67     * happy.  Java inserts a synthetic constructor and CheckTags
68     * can't tell that it's synthetic.)
69     *
70     * @internal
71     * @deprecated This API is ICU internal only.
72     */
73    @Deprecated
74    protected UnicodeFilter() {}
75}
76