164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius*   Copyright (C) 2001-2012, International Business Machines
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Date        Name        Description
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   07/26/01    aliu        Creation.
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "quant.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(Quantifier)
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruQuantifier::Quantifier(UnicodeFunctor *adoptedMatcher,
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       uint32_t _minCount, uint32_t _maxCount) {
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // assert(adopted != 0);
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // assert(minCount <= maxCount);
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    matcher = adoptedMatcher;
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    this->minCount = _minCount;
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    this->maxCount = _maxCount;
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruQuantifier::Quantifier(const Quantifier& o) :
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeFunctor(o),
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeMatcher(o),
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    matcher(o.matcher->clone()),
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    minCount(o.minCount),
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    maxCount(o.maxCount)
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruQuantifier::~Quantifier() {
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    delete matcher;
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeFunctor* Quantifier::clone() const {
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new Quantifier(*this);
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and return the pointer.
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeMatcher* Quantifier::toMatcher() const {
5954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  Quantifier  *nonconst_this = const_cast<Quantifier *>(this);
6054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  UnicodeMatcher *nonconst_base = static_cast<UnicodeMatcher *>(nonconst_this);
6154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
6254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  return nonconst_base;
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUMatchDegree Quantifier::matches(const Replaceable& text,
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 int32_t& offset,
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 int32_t limit,
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 UBool incremental) {
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t start = offset;
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t count = 0;
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (count < maxCount) {
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t pos = offset;
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UMatchDegree m = matcher->toMatcher()->matches(text, offset, limit, incremental);
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (m == U_MATCH) {
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++count;
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (pos == offset) {
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // If offset has not moved we have a zero-width match.
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // Don't keep matching it infinitely.
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if (incremental && m == U_PARTIAL_MATCH) {
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return U_PARTIAL_MATCH;
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (incremental && offset == limit) {
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_PARTIAL_MATCH;
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (count >= minCount) {
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return U_MATCH;
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    offset = start;
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return U_MISMATCH;
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& Quantifier::toPattern(UnicodeString& result,
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     UBool escapeUnprintable) const {
10285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho	result.truncate(0);
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    matcher->toMatcher()->toPattern(result, escapeUnprintable);
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (minCount == 0) {
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (maxCount == 1) {
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return result.append((UChar)63); /*?*/
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if (maxCount == MAX) {
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return result.append((UChar)42); /***/
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // else fall through
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if (minCount == 1 && maxCount == MAX) {
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return result.append((UChar)43); /*+*/
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    result.append((UChar)123); /*{*/
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ICU_Utility::appendNumber(result, minCount);
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    result.append((UChar)44); /*,*/
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (maxCount != MAX) {
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ICU_Utility::appendNumber(result, maxCount);
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    result.append((UChar)125); /*}*/
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result;
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool Quantifier::matchesIndexValue(uint8_t v) const {
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (minCount == 0) || matcher->toMatcher()->matchesIndexValue(v);
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid Quantifier::addMatchSetTo(UnicodeSet& toUnionTo) const {
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (maxCount > 0) {
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        matcher->toMatcher()->addMatchSetTo(toUnionTo);
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid Quantifier::setData(const TransliterationRuleData* d) {
14485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho		matcher->setData(d);
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof
152