1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (c) 2001-2004, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 07/18/01 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unifilt.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/rep.h" 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter) 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Define this here due to the lack of another file. 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru It can't be defined in the header */ 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeMatcher::~UnicodeMatcher() {} 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeFilter::~UnicodeFilter() {} 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and return the pointer. 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeMatcher* UnicodeFilter::toMatcher() const { 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (UnicodeMatcher*) this; 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid UnicodeFilter::setData(const TransliterationRuleData*) {} 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Default implementation of UnicodeMatcher::matches() for Unicode 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * filters. Matches a single code point at offset (either one or 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * two 16-bit code units). 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUMatchDegree UnicodeFilter::matches(const Replaceable& text, 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t& offset, 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool incremental) { 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (offset < limit && 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru contains(c = text.char32At(offset))) { 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offset += UTF_CHAR_LENGTH(c); 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MATCH; 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (offset > limit && 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru contains(c = text.char32At(offset))) { 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Backup offset by 1, unless the preceding character is a 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // surrogate pair -- then backup by 2 (keep offset pointing at 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the lead surrogate). 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --offset; 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (offset >= 0) { 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offset -= UTF_CHAR_LENGTH(text.char32At(offset)) - 1; 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MATCH; 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (incremental && offset == limit) { 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_PARTIAL_MATCH; 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MISMATCH; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof 69