1/*
2*******************************************************************************
3* Copyright (C) 2009-2011, International Business Machines Corporation and
4* others. All Rights Reserved.
5*******************************************************************************
6*
7* File PLURFMT.CPP
8*
9* Modification History:
10*
11*   Date        Name        Description
12*******************************************************************************
13*/
14
15#include "unicode/messagepattern.h"
16#include "unicode/plurfmt.h"
17#include "unicode/plurrule.h"
18#include "unicode/utypes.h"
19#include "cmemory.h"
20#include "messageimpl.h"
21#include "plurrule_impl.h"
22#include "uassert.h"
23#include "uhash.h"
24
25#if !UCONFIG_NO_FORMATTING
26
27U_NAMESPACE_BEGIN
28
29static const UChar OTHER_STRING[] = {
30    0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
31};
32
33UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
34
35PluralFormat::PluralFormat(UErrorCode& status)
36        : locale(Locale::getDefault()),
37          msgPattern(status),
38          numberFormat(NULL),
39          offset(0) {
40    init(NULL, status);
41}
42
43PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
44        : locale(loc),
45          msgPattern(status),
46          numberFormat(NULL),
47          offset(0) {
48    init(NULL, status);
49}
50
51PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
52        : locale(Locale::getDefault()),
53          msgPattern(status),
54          numberFormat(NULL),
55          offset(0) {
56    init(&rules, status);
57}
58
59PluralFormat::PluralFormat(const Locale& loc,
60                           const PluralRules& rules,
61                           UErrorCode& status)
62        : locale(loc),
63          msgPattern(status),
64          numberFormat(NULL),
65          offset(0) {
66    init(&rules, status);
67}
68
69PluralFormat::PluralFormat(const UnicodeString& pat,
70                           UErrorCode& status)
71        : locale(Locale::getDefault()),
72          msgPattern(status),
73          numberFormat(NULL),
74          offset(0) {
75    init(NULL, status);
76    applyPattern(pat, status);
77}
78
79PluralFormat::PluralFormat(const Locale& loc,
80                           const UnicodeString& pat,
81                           UErrorCode& status)
82        : locale(loc),
83          msgPattern(status),
84          numberFormat(NULL),
85          offset(0) {
86    init(NULL, status);
87    applyPattern(pat, status);
88}
89
90PluralFormat::PluralFormat(const PluralRules& rules,
91                           const UnicodeString& pat,
92                           UErrorCode& status)
93        : locale(Locale::getDefault()),
94          msgPattern(status),
95          numberFormat(NULL),
96          offset(0) {
97    init(&rules, status);
98    applyPattern(pat, status);
99}
100
101PluralFormat::PluralFormat(const Locale& loc,
102                           const PluralRules& rules,
103                           const UnicodeString& pat,
104                           UErrorCode& status)
105        : locale(loc),
106          msgPattern(status),
107          numberFormat(NULL),
108          offset(0) {
109    init(&rules, status);
110    applyPattern(pat, status);
111}
112
113PluralFormat::PluralFormat(const PluralFormat& other)
114        : Format(other),
115          locale(other.locale),
116          msgPattern(other.msgPattern),
117          numberFormat(NULL),
118          offset(other.offset) {
119    copyObjects(other);
120}
121
122void
123PluralFormat::copyObjects(const PluralFormat& other) {
124    UErrorCode status = U_ZERO_ERROR;
125    if (numberFormat != NULL) {
126        delete numberFormat;
127    }
128    if (pluralRulesWrapper.pluralRules != NULL) {
129        delete pluralRulesWrapper.pluralRules;
130    }
131
132    if (other.numberFormat == NULL) {
133        numberFormat = NumberFormat::createInstance(locale, status);
134    } else {
135        numberFormat = (NumberFormat*)other.numberFormat->clone();
136    }
137    if (other.pluralRulesWrapper.pluralRules == NULL) {
138        pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
139    } else {
140        pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
141    }
142}
143
144
145PluralFormat::~PluralFormat() {
146    delete numberFormat;
147}
148
149void
150PluralFormat::init(const PluralRules* rules, UErrorCode& status) {
151    if (U_FAILURE(status)) {
152        return;
153    }
154
155    if (rules==NULL) {
156        pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
157    } else {
158        pluralRulesWrapper.pluralRules = rules->clone();
159        if (pluralRulesWrapper.pluralRules == NULL) {
160            status = U_MEMORY_ALLOCATION_ERROR;
161            return;
162        }
163    }
164
165    numberFormat= NumberFormat::createInstance(locale, status);
166}
167
168void
169PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
170    msgPattern.parsePluralStyle(newPattern, NULL, status);
171    if (U_FAILURE(status)) {
172        msgPattern.clear();
173        offset = 0;
174        return;
175    }
176    offset = msgPattern.getPluralOffset(0);
177}
178
179UnicodeString&
180PluralFormat::format(const Formattable& obj,
181                   UnicodeString& appendTo,
182                   FieldPosition& pos,
183                   UErrorCode& status) const
184{
185    if (U_FAILURE(status)) return appendTo;
186
187    if (obj.isNumeric()) {
188        return format(obj.getDouble(), appendTo, pos, status);
189    } else {
190        status = U_ILLEGAL_ARGUMENT_ERROR;
191        return appendTo;
192    }
193}
194
195UnicodeString
196PluralFormat::format(int32_t number, UErrorCode& status) const {
197    FieldPosition fpos(0);
198    UnicodeString result;
199    return format(number, result, fpos, status);
200}
201
202UnicodeString
203PluralFormat::format(double number, UErrorCode& status) const {
204    FieldPosition fpos(0);
205    UnicodeString result;
206    return format(number, result, fpos, status);
207}
208
209
210UnicodeString&
211PluralFormat::format(int32_t number,
212                     UnicodeString& appendTo,
213                     FieldPosition& pos,
214                     UErrorCode& status) const {
215    return format((double)number, appendTo, pos, status);
216}
217
218UnicodeString&
219PluralFormat::format(double number,
220                     UnicodeString& appendTo,
221                     FieldPosition& pos,
222                     UErrorCode& status) const {
223    if (U_FAILURE(status)) {
224        return appendTo;
225    }
226    if (msgPattern.countParts() == 0) {
227        return numberFormat->format(number, appendTo, pos);
228    }
229    // Get the appropriate sub-message.
230    int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, number, status);
231    // Replace syntactic # signs in the top level of this sub-message
232    // (not in nested arguments) with the formatted number-offset.
233    const UnicodeString& pattern = msgPattern.getPatternString();
234    number -= offset;
235    int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
236    for (;;) {
237        const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
238        const UMessagePatternPartType type = part.getType();
239        int32_t index = part.getIndex();
240        if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
241            return appendTo.append(pattern, prevIndex, index - prevIndex);
242        } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
243            (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
244            appendTo.append(pattern, prevIndex, index - prevIndex);
245            if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
246                numberFormat->format(number, appendTo);
247            }
248            prevIndex = part.getLimit();
249        } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
250            appendTo.append(pattern, prevIndex, index - prevIndex);
251            prevIndex = index;
252            partIndex = msgPattern.getLimitPartIndex(partIndex);
253            index = msgPattern.getPart(partIndex).getLimit();
254            MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
255            prevIndex = index;
256        }
257    }
258}
259
260UnicodeString&
261PluralFormat::toPattern(UnicodeString& appendTo) {
262    if (0 == msgPattern.countParts()) {
263        appendTo.setToBogus();
264    } else {
265        appendTo.append(msgPattern.getPatternString());
266    }
267    return appendTo;
268}
269
270void
271PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
272    if (U_FAILURE(status)) {
273        return;
274    }
275    locale = loc;
276    msgPattern.clear();
277    delete numberFormat;
278    offset = 0;
279    numberFormat = NULL;
280    pluralRulesWrapper.reset();
281    init(NULL, status);
282}
283
284void
285PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
286    if (U_FAILURE(status)) {
287        return;
288    }
289    NumberFormat* nf = (NumberFormat*)format->clone();
290    if (nf != NULL) {
291        delete numberFormat;
292        numberFormat = nf;
293    } else {
294        status = U_MEMORY_ALLOCATION_ERROR;
295    }
296}
297
298Format*
299PluralFormat::clone() const
300{
301    return new PluralFormat(*this);
302}
303
304
305PluralFormat&
306PluralFormat::operator=(const PluralFormat& other) {
307    if (this != &other) {
308        locale = other.locale;
309        msgPattern = other.msgPattern;
310        offset = other.offset;
311        copyObjects(other);
312    }
313
314    return *this;
315}
316
317UBool
318PluralFormat::operator==(const Format& other) const {
319    if (this == &other) {
320        return TRUE;
321    }
322    if (!Format::operator==(other)) {
323        return FALSE;
324    }
325    const PluralFormat& o = (const PluralFormat&)other;
326    return
327        locale == o.locale &&
328        msgPattern == o.msgPattern &&  // implies same offset
329        (numberFormat == NULL) == (o.numberFormat == NULL) &&
330        (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
331        (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
332        (pluralRulesWrapper.pluralRules == NULL ||
333            *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
334}
335
336UBool
337PluralFormat::operator!=(const Format& other) const {
338    return  !operator==(other);
339}
340
341void
342PluralFormat::parseObject(const UnicodeString& /*source*/,
343                        Formattable& /*result*/,
344                        ParsePosition& pos) const
345{
346    // Parsing not supported.
347    pos.setErrorIndex(pos.getIndex());
348}
349
350int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
351                                     const PluralSelector& selector, double number, UErrorCode& ec) {
352    if (U_FAILURE(ec)) {
353        return 0;
354    }
355    int32_t count=pattern.countParts();
356    double offset;
357    const MessagePattern::Part* part=&pattern.getPart(partIndex);
358    if (MessagePattern::Part::hasNumericValue(part->getType())) {
359        offset=pattern.getNumericValue(*part);
360        ++partIndex;
361    } else {
362        offset=0;
363    }
364    // The keyword is empty until we need to match against non-explicit, not-"other" value.
365    // Then we get the keyword from the selector.
366    // (In other words, we never call the selector if we match against an explicit value,
367    // or if the only non-explicit keyword is "other".)
368    UnicodeString keyword;
369    UnicodeString other(FALSE, OTHER_STRING, 5);
370    // When we find a match, we set msgStart>0 and also set this boolean to true
371    // to avoid matching the keyword again (duplicates are allowed)
372    // while we continue to look for an explicit-value match.
373    UBool haveKeywordMatch=FALSE;
374    // msgStart is 0 until we find any appropriate sub-message.
375    // We remember the first "other" sub-message if we have not seen any
376    // appropriate sub-message before.
377    // We remember the first matching-keyword sub-message if we have not seen
378    // one of those before.
379    // (The parser allows [does not check for] duplicate keywords.
380    // We just have to make sure to take the first one.)
381    // We avoid matching the keyword twice by also setting haveKeywordMatch=true
382    // at the first keyword match.
383    // We keep going until we find an explicit-value match or reach the end of the plural style.
384    int32_t msgStart=0;
385    // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
386    // until ARG_LIMIT or end of plural-only pattern.
387    do {
388        part=&pattern.getPart(partIndex++);
389        const UMessagePatternPartType type = part->getType();
390        if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
391            break;
392        }
393        U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
394        // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
395        if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
396            // explicit value like "=2"
397            part=&pattern.getPart(partIndex++);
398            if(number==pattern.getNumericValue(*part)) {
399                // matches explicit value
400                return partIndex;
401            }
402        } else if(!haveKeywordMatch) {
403            // plural keyword like "few" or "other"
404            // Compare "other" first and call the selector if this is not "other".
405            if(pattern.partSubstringMatches(*part, other)) {
406                if(msgStart==0) {
407                    msgStart=partIndex;
408                    if(0 == keyword.compare(other)) {
409                        // This is the first "other" sub-message,
410                        // and the selected keyword is also "other".
411                        // Do not match "other" again.
412                        haveKeywordMatch=TRUE;
413                    }
414                }
415            } else {
416                if(keyword.isEmpty()) {
417                    keyword=selector.select(number-offset, ec);
418                    if(msgStart!=0 && (0 == keyword.compare(other))) {
419                        // We have already seen an "other" sub-message.
420                        // Do not match "other" again.
421                        haveKeywordMatch=TRUE;
422                        // Skip keyword matching but do getLimitPartIndex().
423                    }
424                }
425                if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
426                    // keyword matches
427                    msgStart=partIndex;
428                    // Do not match this keyword again.
429                    haveKeywordMatch=TRUE;
430                }
431            }
432        }
433        partIndex=pattern.getLimitPartIndex(partIndex);
434    } while(++partIndex<count);
435    return msgStart;
436}
437
438PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
439    delete pluralRules;
440}
441
442UnicodeString PluralFormat::PluralSelectorAdapter::select(double number,
443                                                          UErrorCode& /*ec*/) const {
444    return pluralRules->select(number);
445}
446
447void PluralFormat::PluralSelectorAdapter::reset() {
448    delete pluralRules;
449    pluralRules = NULL;
450}
451
452
453U_NAMESPACE_END
454
455
456#endif /* #if !UCONFIG_NO_FORMATTING */
457
458//eof
459