1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html#License 3/* 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9package com.ibm.icu.text; 10 11/** 12 * <code>UnicodeFilter</code> defines a protocol for selecting a 13 * subset of the full range (U+0000 to U+FFFF) of Unicode characters. 14 * Currently, filters are used in conjunction with classes like 15 * {@link com.ibm.icu.text.Transliterator} 16 * to only process selected characters through a 17 * transformation. 18 * @stable ICU 2.0 19 */ 20@SuppressWarnings("javadoc") // com.imb.icu.text.Transliterator is in another project 21public abstract class UnicodeFilter implements UnicodeMatcher { 22 23 /** 24 * Returns <tt>true</tt> for characters that are in the selected 25 * subset. In other words, if a character is <b>to be 26 * filtered</b>, then <tt>contains()</tt> returns 27 * <b><tt>false</tt></b>. 28 * @stable ICU 2.0 29 */ 30 public abstract boolean contains(int c); 31 32 /** 33 * Default implementation of UnicodeMatcher::matches() for Unicode 34 * filters. Matches a single 16-bit code unit at offset. 35 * @stable ICU 2.0 36 */ 37 @Override 38 public int matches(Replaceable text, 39 int[] offset, 40 int limit, 41 boolean incremental) { 42 int c; 43 if (offset[0] < limit && 44 contains(c = text.char32At(offset[0]))) { 45 offset[0] += UTF16.getCharCount(c); 46 return U_MATCH; 47 } 48 if (offset[0] > limit && contains(text.char32At(offset[0]))) { 49 // Backup offset by 1, unless the preceding character is a 50 // surrogate pair -- then backup by 2 (keep offset pointing at 51 // the lead surrogate). 52 --offset[0]; 53 if (offset[0] >= 0) { 54 offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1; 55 } 56 return U_MATCH; 57 } 58 if (incremental && offset[0] == limit) { 59 return U_PARTIAL_MATCH; 60 } 61 return U_MISMATCH; 62 } 63 64 // TODO Remove this when the JDK property implements MemberDoc.isSynthetic 65 /** 66 * (This should not be here; it is declared to make CheckTags 67 * happy. Java inserts a synthetic constructor and CheckTags 68 * can't tell that it's synthetic.) 69 * 70 * @internal 71 * @deprecated This API is ICU internal only. 72 */ 73 @Deprecated 74 protected UnicodeFilter() {} 75} 76