1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius* Copyright (c) 2001-2012, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 07/18/01 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unifilt.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/rep.h" 13103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter) 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Define this here due to the lack of another file. 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru It can't be defined in the header */ 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeMatcher::~UnicodeMatcher() {} 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeFilter::~UnicodeFilter() {} 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 2654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * UnicodeFunctor API. 2754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * Note that UnicodeMatcher is a base class of UnicodeFilter. 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeMatcher* UnicodeFilter::toMatcher() const { 3054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius return const_cast<UnicodeFilter *>(this); 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid UnicodeFilter::setData(const TransliterationRuleData*) {} 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Default implementation of UnicodeMatcher::matches() for Unicode 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * filters. Matches a single code point at offset (either one or 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * two 16-bit code units). 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUMatchDegree UnicodeFilter::matches(const Replaceable& text, 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t& offset, 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool incremental) { 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (offset < limit && 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru contains(c = text.char32At(offset))) { 47103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius offset += U16_LENGTH(c); 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MATCH; 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (offset > limit && 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru contains(c = text.char32At(offset))) { 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Backup offset by 1, unless the preceding character is a 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // surrogate pair -- then backup by 2 (keep offset pointing at 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the lead surrogate). 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --offset; 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (offset >= 0) { 57103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius offset -= U16_LENGTH(text.char32At(offset)) - 1; 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MATCH; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (incremental && offset == limit) { 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_PARTIAL_MATCH; 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MISMATCH; 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof 70