1/*
2*******************************************************************************
3* Copyright (C) 2009-2015, International Business Machines Corporation and
4* others. All Rights Reserved.
5*******************************************************************************
6*
7* File PLURFMT.CPP
8*******************************************************************************
9*/
10
11#include "unicode/decimfmt.h"
12#include "unicode/messagepattern.h"
13#include "unicode/plurfmt.h"
14#include "unicode/plurrule.h"
15#include "unicode/utypes.h"
16#include "cmemory.h"
17#include "messageimpl.h"
18#include "nfrule.h"
19#include "plurrule_impl.h"
20#include "uassert.h"
21#include "uhash.h"
22#include "precision.h"
23#include "visibledigits.h"
24
25#if !UCONFIG_NO_FORMATTING
26
27U_NAMESPACE_BEGIN
28
29static const UChar OTHER_STRING[] = {
30    0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
31};
32
33UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
34
35PluralFormat::PluralFormat(UErrorCode& status)
36        : locale(Locale::getDefault()),
37          msgPattern(status),
38          numberFormat(NULL),
39          offset(0) {
40    init(NULL, UPLURAL_TYPE_CARDINAL, status);
41}
42
43PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
44        : locale(loc),
45          msgPattern(status),
46          numberFormat(NULL),
47          offset(0) {
48    init(NULL, UPLURAL_TYPE_CARDINAL, status);
49}
50
51PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
52        : locale(Locale::getDefault()),
53          msgPattern(status),
54          numberFormat(NULL),
55          offset(0) {
56    init(&rules, UPLURAL_TYPE_COUNT, status);
57}
58
59PluralFormat::PluralFormat(const Locale& loc,
60                           const PluralRules& rules,
61                           UErrorCode& status)
62        : locale(loc),
63          msgPattern(status),
64          numberFormat(NULL),
65          offset(0) {
66    init(&rules, UPLURAL_TYPE_COUNT, status);
67}
68
69PluralFormat::PluralFormat(const Locale& loc,
70                           UPluralType type,
71                           UErrorCode& status)
72        : locale(loc),
73          msgPattern(status),
74          numberFormat(NULL),
75          offset(0) {
76    init(NULL, type, status);
77}
78
79PluralFormat::PluralFormat(const UnicodeString& pat,
80                           UErrorCode& status)
81        : locale(Locale::getDefault()),
82          msgPattern(status),
83          numberFormat(NULL),
84          offset(0) {
85    init(NULL, UPLURAL_TYPE_CARDINAL, status);
86    applyPattern(pat, status);
87}
88
89PluralFormat::PluralFormat(const Locale& loc,
90                           const UnicodeString& pat,
91                           UErrorCode& status)
92        : locale(loc),
93          msgPattern(status),
94          numberFormat(NULL),
95          offset(0) {
96    init(NULL, UPLURAL_TYPE_CARDINAL, status);
97    applyPattern(pat, status);
98}
99
100PluralFormat::PluralFormat(const PluralRules& rules,
101                           const UnicodeString& pat,
102                           UErrorCode& status)
103        : locale(Locale::getDefault()),
104          msgPattern(status),
105          numberFormat(NULL),
106          offset(0) {
107    init(&rules, UPLURAL_TYPE_COUNT, status);
108    applyPattern(pat, status);
109}
110
111PluralFormat::PluralFormat(const Locale& loc,
112                           const PluralRules& rules,
113                           const UnicodeString& pat,
114                           UErrorCode& status)
115        : locale(loc),
116          msgPattern(status),
117          numberFormat(NULL),
118          offset(0) {
119    init(&rules, UPLURAL_TYPE_COUNT, status);
120    applyPattern(pat, status);
121}
122
123PluralFormat::PluralFormat(const Locale& loc,
124                           UPluralType type,
125                           const UnicodeString& pat,
126                           UErrorCode& status)
127        : locale(loc),
128          msgPattern(status),
129          numberFormat(NULL),
130          offset(0) {
131    init(NULL, type, status);
132    applyPattern(pat, status);
133}
134
135PluralFormat::PluralFormat(const PluralFormat& other)
136        : Format(other),
137          locale(other.locale),
138          msgPattern(other.msgPattern),
139          numberFormat(NULL),
140          offset(other.offset) {
141    copyObjects(other);
142}
143
144void
145PluralFormat::copyObjects(const PluralFormat& other) {
146    UErrorCode status = U_ZERO_ERROR;
147    if (numberFormat != NULL) {
148        delete numberFormat;
149    }
150    if (pluralRulesWrapper.pluralRules != NULL) {
151        delete pluralRulesWrapper.pluralRules;
152    }
153
154    if (other.numberFormat == NULL) {
155        numberFormat = NumberFormat::createInstance(locale, status);
156    } else {
157        numberFormat = (NumberFormat*)other.numberFormat->clone();
158    }
159    if (other.pluralRulesWrapper.pluralRules == NULL) {
160        pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
161    } else {
162        pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
163    }
164}
165
166
167PluralFormat::~PluralFormat() {
168    delete numberFormat;
169}
170
171void
172PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
173    if (U_FAILURE(status)) {
174        return;
175    }
176
177    if (rules==NULL) {
178        pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
179    } else {
180        pluralRulesWrapper.pluralRules = rules->clone();
181        if (pluralRulesWrapper.pluralRules == NULL) {
182            status = U_MEMORY_ALLOCATION_ERROR;
183            return;
184        }
185    }
186
187    numberFormat= NumberFormat::createInstance(locale, status);
188}
189
190void
191PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
192    msgPattern.parsePluralStyle(newPattern, NULL, status);
193    if (U_FAILURE(status)) {
194        msgPattern.clear();
195        offset = 0;
196        return;
197    }
198    offset = msgPattern.getPluralOffset(0);
199}
200
201UnicodeString&
202PluralFormat::format(const Formattable& obj,
203                   UnicodeString& appendTo,
204                   FieldPosition& pos,
205                   UErrorCode& status) const
206{
207    if (U_FAILURE(status)) return appendTo;
208
209    if (obj.isNumeric()) {
210        return format(obj, obj.getDouble(), appendTo, pos, status);
211    } else {
212        status = U_ILLEGAL_ARGUMENT_ERROR;
213        return appendTo;
214    }
215}
216
217UnicodeString
218PluralFormat::format(int32_t number, UErrorCode& status) const {
219    FieldPosition fpos(0);
220    UnicodeString result;
221    return format(Formattable(number), number, result, fpos, status);
222}
223
224UnicodeString
225PluralFormat::format(double number, UErrorCode& status) const {
226    FieldPosition fpos(0);
227    UnicodeString result;
228    return format(Formattable(number), number, result, fpos, status);
229}
230
231
232UnicodeString&
233PluralFormat::format(int32_t number,
234                     UnicodeString& appendTo,
235                     FieldPosition& pos,
236                     UErrorCode& status) const {
237    return format(Formattable(number), (double)number, appendTo, pos, status);
238}
239
240UnicodeString&
241PluralFormat::format(double number,
242                     UnicodeString& appendTo,
243                     FieldPosition& pos,
244                     UErrorCode& status) const {
245    return format(Formattable(number), (double)number, appendTo, pos, status);
246}
247
248UnicodeString&
249PluralFormat::format(const Formattable& numberObject, double number,
250                     UnicodeString& appendTo,
251                     FieldPosition& pos,
252                     UErrorCode& status) const {
253    if (U_FAILURE(status)) {
254        return appendTo;
255    }
256    if (msgPattern.countParts() == 0) {
257        return numberFormat->format(numberObject, appendTo, pos, status);
258    }
259    // Get the appropriate sub-message.
260    // Select it based on the formatted number-offset.
261    double numberMinusOffset = number - offset;
262    UnicodeString numberString;
263    FieldPosition ignorePos;
264    FixedPrecision fp;
265    VisibleDigitsWithExponent dec;
266    fp.initVisibleDigitsWithExponent(numberMinusOffset, dec, status);
267    if (U_FAILURE(status)) {
268        return appendTo;
269    }
270    if (offset == 0) {
271        DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
272        if(decFmt != NULL) {
273            decFmt->initVisibleDigitsWithExponent(
274                    numberObject, dec, status);
275            if (U_FAILURE(status)) {
276                return appendTo;
277            }
278            decFmt->format(dec, numberString, ignorePos, status);
279        } else {
280            numberFormat->format(
281                    numberObject, numberString, ignorePos, status);  // could be BigDecimal etc.
282        }
283    } else {
284        DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
285        if(decFmt != NULL) {
286            decFmt->initVisibleDigitsWithExponent(
287                    numberMinusOffset, dec, status);
288            if (U_FAILURE(status)) {
289                return appendTo;
290            }
291            decFmt->format(dec, numberString, ignorePos, status);
292        } else {
293            numberFormat->format(
294                    numberMinusOffset, numberString, ignorePos, status);
295        }
296    }
297    int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status);
298    if (U_FAILURE(status)) { return appendTo; }
299    // Replace syntactic # signs in the top level of this sub-message
300    // (not in nested arguments) with the formatted number-offset.
301    const UnicodeString& pattern = msgPattern.getPatternString();
302    int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
303    for (;;) {
304        const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
305        const UMessagePatternPartType type = part.getType();
306        int32_t index = part.getIndex();
307        if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
308            return appendTo.append(pattern, prevIndex, index - prevIndex);
309        } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
310            (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
311            appendTo.append(pattern, prevIndex, index - prevIndex);
312            if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
313                appendTo.append(numberString);
314            }
315            prevIndex = part.getLimit();
316        } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
317            appendTo.append(pattern, prevIndex, index - prevIndex);
318            prevIndex = index;
319            partIndex = msgPattern.getLimitPartIndex(partIndex);
320            index = msgPattern.getPart(partIndex).getLimit();
321            MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
322            prevIndex = index;
323        }
324    }
325}
326
327UnicodeString&
328PluralFormat::toPattern(UnicodeString& appendTo) {
329    if (0 == msgPattern.countParts()) {
330        appendTo.setToBogus();
331    } else {
332        appendTo.append(msgPattern.getPatternString());
333    }
334    return appendTo;
335}
336
337void
338PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
339    if (U_FAILURE(status)) {
340        return;
341    }
342    locale = loc;
343    msgPattern.clear();
344    delete numberFormat;
345    offset = 0;
346    numberFormat = NULL;
347    pluralRulesWrapper.reset();
348    init(NULL, UPLURAL_TYPE_CARDINAL, status);
349}
350
351void
352PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
353    if (U_FAILURE(status)) {
354        return;
355    }
356    NumberFormat* nf = (NumberFormat*)format->clone();
357    if (nf != NULL) {
358        delete numberFormat;
359        numberFormat = nf;
360    } else {
361        status = U_MEMORY_ALLOCATION_ERROR;
362    }
363}
364
365Format*
366PluralFormat::clone() const
367{
368    return new PluralFormat(*this);
369}
370
371
372PluralFormat&
373PluralFormat::operator=(const PluralFormat& other) {
374    if (this != &other) {
375        locale = other.locale;
376        msgPattern = other.msgPattern;
377        offset = other.offset;
378        copyObjects(other);
379    }
380
381    return *this;
382}
383
384UBool
385PluralFormat::operator==(const Format& other) const {
386    if (this == &other) {
387        return TRUE;
388    }
389    if (!Format::operator==(other)) {
390        return FALSE;
391    }
392    const PluralFormat& o = (const PluralFormat&)other;
393    return
394        locale == o.locale &&
395        msgPattern == o.msgPattern &&  // implies same offset
396        (numberFormat == NULL) == (o.numberFormat == NULL) &&
397        (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
398        (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
399        (pluralRulesWrapper.pluralRules == NULL ||
400            *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
401}
402
403UBool
404PluralFormat::operator!=(const Format& other) const {
405    return  !operator==(other);
406}
407
408void
409PluralFormat::parseObject(const UnicodeString& /*source*/,
410                        Formattable& /*result*/,
411                        ParsePosition& pos) const
412{
413    // Parsing not supported.
414    pos.setErrorIndex(pos.getIndex());
415}
416
417int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
418                                     const PluralSelector& selector, void *context,
419                                     double number, UErrorCode& ec) {
420    if (U_FAILURE(ec)) {
421        return 0;
422    }
423    int32_t count=pattern.countParts();
424    double offset;
425    const MessagePattern::Part* part=&pattern.getPart(partIndex);
426    if (MessagePattern::Part::hasNumericValue(part->getType())) {
427        offset=pattern.getNumericValue(*part);
428        ++partIndex;
429    } else {
430        offset=0;
431    }
432    // The keyword is empty until we need to match against a non-explicit, not-"other" value.
433    // Then we get the keyword from the selector.
434    // (In other words, we never call the selector if we match against an explicit value,
435    // or if the only non-explicit keyword is "other".)
436    UnicodeString keyword;
437    UnicodeString other(FALSE, OTHER_STRING, 5);
438    // When we find a match, we set msgStart>0 and also set this boolean to true
439    // to avoid matching the keyword again (duplicates are allowed)
440    // while we continue to look for an explicit-value match.
441    UBool haveKeywordMatch=FALSE;
442    // msgStart is 0 until we find any appropriate sub-message.
443    // We remember the first "other" sub-message if we have not seen any
444    // appropriate sub-message before.
445    // We remember the first matching-keyword sub-message if we have not seen
446    // one of those before.
447    // (The parser allows [does not check for] duplicate keywords.
448    // We just have to make sure to take the first one.)
449    // We avoid matching the keyword twice by also setting haveKeywordMatch=true
450    // at the first keyword match.
451    // We keep going until we find an explicit-value match or reach the end of the plural style.
452    int32_t msgStart=0;
453    // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
454    // until ARG_LIMIT or end of plural-only pattern.
455    do {
456        part=&pattern.getPart(partIndex++);
457        const UMessagePatternPartType type = part->getType();
458        if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
459            break;
460        }
461        U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
462        // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
463        if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
464            // explicit value like "=2"
465            part=&pattern.getPart(partIndex++);
466            if(number==pattern.getNumericValue(*part)) {
467                // matches explicit value
468                return partIndex;
469            }
470        } else if(!haveKeywordMatch) {
471            // plural keyword like "few" or "other"
472            // Compare "other" first and call the selector if this is not "other".
473            if(pattern.partSubstringMatches(*part, other)) {
474                if(msgStart==0) {
475                    msgStart=partIndex;
476                    if(0 == keyword.compare(other)) {
477                        // This is the first "other" sub-message,
478                        // and the selected keyword is also "other".
479                        // Do not match "other" again.
480                        haveKeywordMatch=TRUE;
481                    }
482                }
483            } else {
484                if(keyword.isEmpty()) {
485                    keyword=selector.select(context, number-offset, ec);
486                    if(msgStart!=0 && (0 == keyword.compare(other))) {
487                        // We have already seen an "other" sub-message.
488                        // Do not match "other" again.
489                        haveKeywordMatch=TRUE;
490                        // Skip keyword matching but do getLimitPartIndex().
491                    }
492                }
493                if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
494                    // keyword matches
495                    msgStart=partIndex;
496                    // Do not match this keyword again.
497                    haveKeywordMatch=TRUE;
498                }
499            }
500        }
501        partIndex=pattern.getLimitPartIndex(partIndex);
502    } while(++partIndex<count);
503    return msgStart;
504}
505
506void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
507    // If no pattern was applied, return null.
508    if (msgPattern.countParts() == 0) {
509        pos.setBeginIndex(-1);
510        pos.setEndIndex(-1);
511        return;
512    }
513    int partIndex = 0;
514    int currMatchIndex;
515    int count=msgPattern.countParts();
516    int startingAt = pos.getBeginIndex();
517    if (startingAt < 0) {
518        startingAt = 0;
519    }
520
521    // The keyword is null until we need to match against a non-explicit, not-"other" value.
522    // Then we get the keyword from the selector.
523    // (In other words, we never call the selector if we match against an explicit value,
524    // or if the only non-explicit keyword is "other".)
525    UnicodeString keyword;
526    UnicodeString matchedWord;
527    const UnicodeString& pattern = msgPattern.getPatternString();
528    int matchedIndex = -1;
529    // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
530    // until the end of the plural-only pattern.
531    while (partIndex < count) {
532        const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
533        if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
534            // Bad format
535            continue;
536        }
537
538        const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
539        if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
540            // Bad format
541            continue;
542        }
543
544        const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
545        if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
546            // Bad format
547            continue;
548        }
549
550        UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
551        if (rbnfLenientScanner != NULL) {
552            // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
553            int32_t length = -1;
554            currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
555        }
556        else {
557            currMatchIndex = source.indexOf(currArg, startingAt);
558        }
559        if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
560            matchedIndex = currMatchIndex;
561            matchedWord = currArg;
562            keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
563        }
564    }
565    if (matchedIndex >= 0) {
566        pos.setBeginIndex(matchedIndex);
567        pos.setEndIndex(matchedIndex + matchedWord.length());
568        result.setString(keyword);
569        return;
570    }
571
572    // Not found!
573    pos.setBeginIndex(-1);
574    pos.setEndIndex(-1);
575}
576
577PluralFormat::PluralSelector::~PluralSelector() {}
578
579PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
580    delete pluralRules;
581}
582
583UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
584                                                          UErrorCode& /*ec*/) const {
585    (void)number;  // unused except in the assertion
586    VisibleDigitsWithExponent *dec=static_cast<VisibleDigitsWithExponent *>(context);
587    return pluralRules->select(*dec);
588}
589
590void PluralFormat::PluralSelectorAdapter::reset() {
591    delete pluralRules;
592    pluralRules = NULL;
593}
594
595
596U_NAMESPACE_END
597
598
599#endif /* #if !UCONFIG_NO_FORMATTING */
600
601//eof
602