164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius* Copyright (c) 2001-2012, International Business Machines 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 07/18/01 aliu Creation. 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unifilt.h" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/rep.h" 15103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter) 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Define this here due to the lack of another file. 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru It can't be defined in the header */ 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeMatcher::~UnicodeMatcher() {} 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeFilter::~UnicodeFilter() {} 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 2854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * UnicodeFunctor API. 2954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * Note that UnicodeMatcher is a base class of UnicodeFilter. 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeMatcher* UnicodeFilter::toMatcher() const { 3254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius return const_cast<UnicodeFilter *>(this); 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid UnicodeFilter::setData(const TransliterationRuleData*) {} 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Default implementation of UnicodeMatcher::matches() for Unicode 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * filters. Matches a single code point at offset (either one or 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * two 16-bit code units). 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUMatchDegree UnicodeFilter::matches(const Replaceable& text, 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t& offset, 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool incremental) { 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (offset < limit && 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru contains(c = text.char32At(offset))) { 49103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius offset += U16_LENGTH(c); 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MATCH; 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (offset > limit && 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru contains(c = text.char32At(offset))) { 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Backup offset by 1, unless the preceding character is a 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // surrogate pair -- then backup by 2 (keep offset pointing at 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the lead surrogate). 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --offset; 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (offset >= 0) { 59103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius offset -= U16_LENGTH(text.char32At(offset)) - 1; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MATCH; 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (incremental && offset == limit) { 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_PARTIAL_MATCH; 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MISMATCH; 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof 72