1/* 2********************************************************************** 3* Copyright (C) 1999-2010, International Business Machines Corporation and others. 4* All Rights Reserved. 5********************************************************************** 6* Date Name Description 7* 11/17/99 aliu Creation. 8********************************************************************** 9*/ 10#ifndef UNIFILT_H 11#define UNIFILT_H 12 13#include "unicode/unifunct.h" 14#include "unicode/unimatch.h" 15 16/** 17 * \file 18 * \brief C++ API: Unicode Filter 19 */ 20 21U_NAMESPACE_BEGIN 22 23/** 24 * U_ETHER is used to represent character values for positions outside 25 * a range. For example, transliterator uses this to represent 26 * characters outside the range contextStart..contextLimit-1. This 27 * allows explicit matching by rules and UnicodeSets of text outside a 28 * defined range. 29 * @stable ICU 3.0 30 */ 31#define U_ETHER ((UChar)0xFFFF) 32 33/** 34 * 35 * <code>UnicodeFilter</code> defines a protocol for selecting a 36 * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. 37 * Currently, filters are used in conjunction with classes like {@link 38 * Transliterator} to only process selected characters through a 39 * transformation. 40 * 41 * <p>Note: UnicodeFilter currently stubs out two pure virtual methods 42 * of its base class, UnicodeMatcher. These methods are toPattern() 43 * and matchesIndexValue(). This is done so that filter classes that 44 * are not actually used as matchers -- specifically, those in the 45 * UnicodeFilterLogic component, and those in tests -- can continue to 46 * work without defining these methods. As long as a filter is not 47 * used in an RBT during real transliteration, these methods will not 48 * be called. However, this breaks the UnicodeMatcher base class 49 * protocol, and it is not a correct solution. 50 * 51 * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter 52 * hierarchy and either redesign it, or simply remove the stubs in 53 * UnicodeFilter and force subclasses to implement the full 54 * UnicodeMatcher protocol. 55 * 56 * @see UnicodeFilterLogic 57 * @stable ICU 2.0 58 */ 59class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { 60 61public: 62 /** 63 * Destructor 64 * @stable ICU 2.0 65 */ 66 virtual ~UnicodeFilter(); 67 68 /** 69 * Returns <tt>true</tt> for characters that are in the selected 70 * subset. In other words, if a character is <b>to be 71 * filtered</b>, then <tt>contains()</tt> returns 72 * <b><tt>false</tt></b>. 73 * @stable ICU 2.0 74 */ 75 virtual UBool contains(UChar32 c) const = 0; 76 77 /** 78 * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer 79 * and return the pointer. 80 * @stable ICU 2.4 81 */ 82 virtual UnicodeMatcher* toMatcher() const; 83 84 /** 85 * Implement UnicodeMatcher API. 86 * @stable ICU 2.4 87 */ 88 virtual UMatchDegree matches(const Replaceable& text, 89 int32_t& offset, 90 int32_t limit, 91 UBool incremental); 92 93 /** 94 * UnicodeFunctor API. Nothing to do. 95 * @stable ICU 2.4 96 */ 97 virtual void setData(const TransliterationRuleData*); 98 99 /** 100 * ICU "poor man's RTTI", returns a UClassID for this class. 101 * 102 * @stable ICU 2.2 103 */ 104 static UClassID U_EXPORT2 getStaticClassID(); 105 106protected: 107 108 /* 109 * Since this class has pure virtual functions, 110 * a constructor can't be used. 111 * @stable ICU 2.0 112 */ 113/* UnicodeFilter();*/ 114}; 115 116/*inline UnicodeFilter::UnicodeFilter() {}*/ 117 118U_NAMESPACE_END 119 120#endif 121