17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2014, International Business Machines Corporation and * 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. * 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>UnicodeFilter</code> defines a protocol for selecting a 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * subset of the full range (U+0000 to U+FFFF) of Unicode characters. 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Currently, filters are used in conjunction with classes like 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <a href="Transliterator.html" title="class in com.ibm.icu.text"><code>Transliterator</code></a> 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * to only process selected characters through a 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * transformation. 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic abstract class UnicodeFilter implements UnicodeMatcher { 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns <tt>true</tt> for characters that are in the selected 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * subset. In other words, if a character is <b>to be 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * filtered</b>, then <tt>contains()</tt> returns 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <b><tt>false</tt></b>. 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public abstract boolean contains(int c); 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Default implementation of UnicodeMatcher::matches() for Unicode 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * filters. Matches a single 16-bit code unit at offset. 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 2.0 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int matches(Replaceable text, 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int[] offset, 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limit, 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean incremental) { 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c; 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (offset[0] < limit && 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert contains(c = text.char32At(offset[0]))) { 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offset[0] += UTF16.getCharCount(c); 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return U_MATCH; 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (offset[0] > limit && contains(text.char32At(offset[0]))) { 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Backup offset by 1, unless the preceding character is a 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // surrogate pair -- then backup by 2 (keep offset pointing at 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the lead surrogate). 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --offset[0]; 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (offset[0] >= 0) { 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1; 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return U_MATCH; 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (incremental && offset[0] == limit) { 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return U_PARTIAL_MATCH; 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return U_MISMATCH; 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO Remove this when the JDK property implements MemberDoc.isSynthetic 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (This should not be here; it is declared to make CheckTags 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * happy. Java inserts a synthetic constructor and CheckTags 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * can't tell that it's synthetic.) 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @internal 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @deprecated This API is ICU internal only. 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Deprecated 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected UnicodeFilter() {} 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 72