164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius* Copyright (C) 2001-2012, International Business Machines 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 07/26/01 aliu Creation. 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "quant.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(Quantifier) 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruQuantifier::Quantifier(UnicodeFunctor *adoptedMatcher, 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t _minCount, uint32_t _maxCount) { 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assert(adopted != 0); 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assert(minCount <= maxCount); 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matcher = adoptedMatcher; 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru this->minCount = _minCount; 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru this->maxCount = _maxCount; 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruQuantifier::Quantifier(const Quantifier& o) : 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFunctor(o), 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeMatcher(o), 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matcher(o.matcher->clone()), 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minCount(o.minCount), 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxCount(o.maxCount) 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruQuantifier::~Quantifier() { 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete matcher; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeFunctor* Quantifier::clone() const { 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new Quantifier(*this); 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and return the pointer. 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeMatcher* Quantifier::toMatcher() const { 5954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius Quantifier *nonconst_this = const_cast<Quantifier *>(this); 6054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius UnicodeMatcher *nonconst_base = static_cast<UnicodeMatcher *>(nonconst_this); 6154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 6254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius return nonconst_base; 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUMatchDegree Quantifier::matches(const Replaceable& text, 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t& offset, 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool incremental) { 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start = offset; 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t count = 0; 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (count < maxCount) { 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t pos = offset; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UMatchDegree m = matcher->toMatcher()->matches(text, offset, limit, incremental); 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (m == U_MATCH) { 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++count; 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (pos == offset) { 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If offset has not moved we have a zero-width match. 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Don't keep matching it infinitely. 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (incremental && m == U_PARTIAL_MATCH) { 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_PARTIAL_MATCH; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (incremental && offset == limit) { 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_PARTIAL_MATCH; 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (count >= minCount) { 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MATCH; 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offset = start; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MISMATCH; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& Quantifier::toPattern(UnicodeString& result, 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool escapeUnprintable) const { 10285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho result.truncate(0); 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matcher->toMatcher()->toPattern(result, escapeUnprintable); 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (minCount == 0) { 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (maxCount == 1) { 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result.append((UChar)63); /*?*/ 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (maxCount == MAX) { 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result.append((UChar)42); /***/ 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // else fall through 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (minCount == 1 && maxCount == MAX) { 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result.append((UChar)43); /*+*/ 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.append((UChar)123); /*{*/ 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendNumber(result, minCount); 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.append((UChar)44); /*,*/ 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (maxCount != MAX) { 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendNumber(result, maxCount); 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.append((UChar)125); /*}*/ 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result; 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool Quantifier::matchesIndexValue(uint8_t v) const { 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (minCount == 0) || matcher->toMatcher()->matchesIndexValue(v); 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid Quantifier::addMatchSetTo(UnicodeSet& toUnionTo) const { 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (maxCount > 0) { 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matcher->toMatcher()->addMatchSetTo(toUnionTo); 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid Quantifier::setData(const TransliterationRuleData* d) { 14485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho matcher->setData(d); 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof 152