1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 1997-2013, International Business Machines Corporation and    *
6* others. All Rights Reserved.                                                *
7*******************************************************************************
8*
9* File CHOICFMT.CPP
10*
11* Modification History:
12*
13*   Date        Name        Description
14*   02/19/97    aliu        Converted from java.
15*   03/20/97    helena      Finished first cut of implementation and got rid
16*                           of nextDouble/previousDouble and replaced with
17*                           boolean array.
18*   4/10/97     aliu        Clean up.  Modified to work on AIX.
19*   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include
20*                           wchar.h.
21*   07/09/97    helena      Made ParsePosition into a class.
22*   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
23*   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
24*   02/22/99    stephen     Removed character literals for EBCDIC safety
25********************************************************************************
26*/
27
28#include "unicode/utypes.h"
29
30#if !UCONFIG_NO_FORMATTING
31
32#include "unicode/choicfmt.h"
33#include "unicode/numfmt.h"
34#include "unicode/locid.h"
35#include "cpputils.h"
36#include "cstring.h"
37#include "messageimpl.h"
38#include "putilimp.h"
39#include "uassert.h"
40#include <stdio.h>
41#include <float.h>
42
43// *****************************************************************************
44// class ChoiceFormat
45// *****************************************************************************
46
47U_NAMESPACE_BEGIN
48
49UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
50
51// Special characters used by ChoiceFormat.  There are two characters
52// used interchangeably to indicate <=.  Either is parsed, but only
53// LESS_EQUAL is generated by toPattern().
54#define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
55#define LESS_THAN    ((UChar)0x003C)   /*<*/
56#define LESS_EQUAL   ((UChar)0x0023)   /*#*/
57#define LESS_EQUAL2  ((UChar)0x2264)
58#define VERTICAL_BAR ((UChar)0x007C)   /*|*/
59#define MINUS        ((UChar)0x002D)   /*-*/
60
61static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/
62static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/
63
64#ifdef INFINITY
65#undef INFINITY
66#endif
67#define INFINITY     ((UChar)0x221E)
68
69//static const UChar gPositiveInfinity[] = {INFINITY, 0};
70//static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
71#define POSITIVE_INF_STRLEN 1
72#define NEGATIVE_INF_STRLEN 2
73
74// -------------------------------------
75// Creates a ChoiceFormat instance based on the pattern.
76
77ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
78                           UErrorCode& status)
79: constructorErrorCode(status),
80  msgPattern(status)
81{
82    applyPattern(newPattern, status);
83}
84
85// -------------------------------------
86// Creates a ChoiceFormat instance with the limit array and
87// format strings for each limit.
88
89ChoiceFormat::ChoiceFormat(const double* limits,
90                           const UnicodeString* formats,
91                           int32_t cnt )
92: constructorErrorCode(U_ZERO_ERROR),
93  msgPattern(constructorErrorCode)
94{
95    setChoices(limits, NULL, formats, cnt, constructorErrorCode);
96}
97
98// -------------------------------------
99
100ChoiceFormat::ChoiceFormat(const double* limits,
101                           const UBool* closures,
102                           const UnicodeString* formats,
103                           int32_t cnt )
104: constructorErrorCode(U_ZERO_ERROR),
105  msgPattern(constructorErrorCode)
106{
107    setChoices(limits, closures, formats, cnt, constructorErrorCode);
108}
109
110// -------------------------------------
111// copy constructor
112
113ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that)
114: NumberFormat(that),
115  constructorErrorCode(that.constructorErrorCode),
116  msgPattern(that.msgPattern)
117{
118}
119
120// -------------------------------------
121// Private constructor that creates a
122// ChoiceFormat instance based on the
123// pattern and populates UParseError
124
125ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
126                           UParseError& parseError,
127                           UErrorCode& status)
128: constructorErrorCode(status),
129  msgPattern(status)
130{
131    applyPattern(newPattern,parseError, status);
132}
133// -------------------------------------
134
135UBool
136ChoiceFormat::operator==(const Format& that) const
137{
138    if (this == &that) return TRUE;
139    if (!NumberFormat::operator==(that)) return FALSE;
140    ChoiceFormat& thatAlias = (ChoiceFormat&)that;
141    return msgPattern == thatAlias.msgPattern;
142}
143
144// -------------------------------------
145// copy constructor
146
147const ChoiceFormat&
148ChoiceFormat::operator=(const   ChoiceFormat& that)
149{
150    if (this != &that) {
151        NumberFormat::operator=(that);
152        constructorErrorCode = that.constructorErrorCode;
153        msgPattern = that.msgPattern;
154    }
155    return *this;
156}
157
158// -------------------------------------
159
160ChoiceFormat::~ChoiceFormat()
161{
162}
163
164// -------------------------------------
165
166/**
167 * Convert a double value to a string without the overhead of NumberFormat.
168 */
169UnicodeString&
170ChoiceFormat::dtos(double value,
171                   UnicodeString& string)
172{
173    /* Buffer to contain the digits and any extra formatting stuff. */
174    char temp[DBL_DIG + 16];
175    char *itrPtr = temp;
176    char *expPtr;
177
178    sprintf(temp, "%.*g", DBL_DIG, value);
179
180    /* Find and convert the decimal point.
181       Using setlocale on some machines will cause sprintf to use a comma for certain locales.
182    */
183    while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
184        itrPtr++;
185    }
186    if (*itrPtr != 0 && *itrPtr != 'e') {
187        /* We reached something that looks like a decimal point.
188        In case someone used setlocale(), which changes the decimal point. */
189        *itrPtr = '.';
190        itrPtr++;
191    }
192    /* Search for the exponent */
193    while (*itrPtr && *itrPtr != 'e') {
194        itrPtr++;
195    }
196    if (*itrPtr == 'e') {
197        itrPtr++;
198        /* Verify the exponent sign */
199        if (*itrPtr == '+' || *itrPtr == '-') {
200            itrPtr++;
201        }
202        /* Remove leading zeros. You will see this on Windows machines. */
203        expPtr = itrPtr;
204        while (*itrPtr == '0') {
205            itrPtr++;
206        }
207        if (*itrPtr && expPtr != itrPtr) {
208            /* Shift the exponent without zeros. */
209            while (*itrPtr) {
210                *(expPtr++)  = *(itrPtr++);
211            }
212            // NULL terminate
213            *expPtr = 0;
214        }
215    }
216
217    string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
218    return string;
219}
220
221// -------------------------------------
222// calls the overloaded applyPattern method.
223
224void
225ChoiceFormat::applyPattern(const UnicodeString& pattern,
226                           UErrorCode& status)
227{
228    msgPattern.parseChoiceStyle(pattern, NULL, status);
229    constructorErrorCode = status;
230}
231
232// -------------------------------------
233// Applies the pattern to this ChoiceFormat instance.
234
235void
236ChoiceFormat::applyPattern(const UnicodeString& pattern,
237                           UParseError& parseError,
238                           UErrorCode& status)
239{
240    msgPattern.parseChoiceStyle(pattern, &parseError, status);
241    constructorErrorCode = status;
242}
243// -------------------------------------
244// Returns the input pattern string.
245
246UnicodeString&
247ChoiceFormat::toPattern(UnicodeString& result) const
248{
249    return result = msgPattern.getPatternString();
250}
251
252// -------------------------------------
253// Sets the limit and format arrays.
254void
255ChoiceFormat::setChoices(  const double* limits,
256                           const UnicodeString* formats,
257                           int32_t cnt )
258{
259    UErrorCode errorCode = U_ZERO_ERROR;
260    setChoices(limits, NULL, formats, cnt, errorCode);
261}
262
263// -------------------------------------
264// Sets the limit and format arrays.
265void
266ChoiceFormat::setChoices(  const double* limits,
267                           const UBool* closures,
268                           const UnicodeString* formats,
269                           int32_t cnt )
270{
271    UErrorCode errorCode = U_ZERO_ERROR;
272    setChoices(limits, closures, formats, cnt, errorCode);
273}
274
275void
276ChoiceFormat::setChoices(const double* limits,
277                         const UBool* closures,
278                         const UnicodeString* formats,
279                         int32_t count,
280                         UErrorCode &errorCode) {
281    if (U_FAILURE(errorCode)) {
282        return;
283    }
284    if (limits == NULL || formats == NULL) {
285        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286        return;
287    }
288    // Reconstruct the original input pattern.
289    // Modified version of the pre-ICU 4.8 toPattern() implementation.
290    UnicodeString result;
291    for (int32_t i = 0; i < count; ++i) {
292        if (i != 0) {
293            result += VERTICAL_BAR;
294        }
295        UnicodeString buf;
296        if (uprv_isPositiveInfinity(limits[i])) {
297            result += INFINITY;
298        } else if (uprv_isNegativeInfinity(limits[i])) {
299            result += MINUS;
300            result += INFINITY;
301        } else {
302            result += dtos(limits[i], buf);
303        }
304        if (closures != NULL && closures[i]) {
305            result += LESS_THAN;
306        } else {
307            result += LESS_EQUAL;
308        }
309        // Append formats[i], using quotes if there are special
310        // characters.  Single quotes themselves must be escaped in
311        // either case.
312        const UnicodeString& text = formats[i];
313        int32_t textLength = text.length();
314        int32_t nestingLevel = 0;
315        for (int32_t j = 0; j < textLength; ++j) {
316            UChar c = text[j];
317            if (c == SINGLE_QUOTE && nestingLevel == 0) {
318                // Double each top-level apostrophe.
319                result.append(c);
320            } else if (c == VERTICAL_BAR && nestingLevel == 0) {
321                // Surround each pipe symbol with apostrophes for quoting.
322                // If the next character is an apostrophe, then that will be doubled,
323                // and although the parser will see the apostrophe pairs beginning
324                // and ending one character earlier than our doubling, the result
325                // is as desired.
326                //   | -> '|'
327                //   |' -> '|'''
328                //   |'' -> '|''''' etc.
329                result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
330                continue;  // Skip the append(c) at the end of the loop body.
331            } else if (c == LEFT_CURLY_BRACE) {
332                ++nestingLevel;
333            } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
334                --nestingLevel;
335            }
336            result.append(c);
337        }
338    }
339    // Apply the reconstructed pattern.
340    applyPattern(result, errorCode);
341}
342
343// -------------------------------------
344// Gets the limit array.
345
346const double*
347ChoiceFormat::getLimits(int32_t& cnt) const
348{
349    cnt = 0;
350    return NULL;
351}
352
353// -------------------------------------
354// Gets the closures array.
355
356const UBool*
357ChoiceFormat::getClosures(int32_t& cnt) const
358{
359    cnt = 0;
360    return NULL;
361}
362
363// -------------------------------------
364// Gets the format array.
365
366const UnicodeString*
367ChoiceFormat::getFormats(int32_t& cnt) const
368{
369    cnt = 0;
370    return NULL;
371}
372
373// -------------------------------------
374// Formats an int64 number, it's actually formatted as
375// a double.  The returned format string may differ
376// from the input number because of this.
377
378UnicodeString&
379ChoiceFormat::format(int64_t number,
380                     UnicodeString& appendTo,
381                     FieldPosition& status) const
382{
383    return format((double) number, appendTo, status);
384}
385
386// -------------------------------------
387// Formats an int32_t number, it's actually formatted as
388// a double.
389
390UnicodeString&
391ChoiceFormat::format(int32_t number,
392                     UnicodeString& appendTo,
393                     FieldPosition& status) const
394{
395    return format((double) number, appendTo, status);
396}
397
398// -------------------------------------
399// Formats a double number.
400
401UnicodeString&
402ChoiceFormat::format(double number,
403                     UnicodeString& appendTo,
404                     FieldPosition& /*pos*/) const
405{
406    if (msgPattern.countParts() == 0) {
407        // No pattern was applied, or it failed.
408        return appendTo;
409    }
410    // Get the appropriate sub-message.
411    int32_t msgStart = findSubMessage(msgPattern, 0, number);
412    if (!MessageImpl::jdkAposMode(msgPattern)) {
413        int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
414        int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
415        appendTo.append(msgPattern.getPatternString(),
416                        patternStart,
417                        msgPattern.getPatternIndex(msgLimit) - patternStart);
418        return appendTo;
419    }
420    // JDK compatibility mode: Remove SKIP_SYNTAX.
421    return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
422}
423
424int32_t
425ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
426    int32_t count = pattern.countParts();
427    int32_t msgStart;
428    // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
429    // until ARG_LIMIT or end of choice-only pattern.
430    // Ignore the first number and selector and start the loop on the first message.
431    partIndex += 2;
432    for (;;) {
433        // Skip but remember the current sub-message.
434        msgStart = partIndex;
435        partIndex = pattern.getLimitPartIndex(partIndex);
436        if (++partIndex >= count) {
437            // Reached the end of the choice-only pattern.
438            // Return with the last sub-message.
439            break;
440        }
441        const MessagePattern::Part &part = pattern.getPart(partIndex++);
442        UMessagePatternPartType type = part.getType();
443        if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
444            // Reached the end of the ChoiceFormat style.
445            // Return with the last sub-message.
446            break;
447        }
448        // part is an ARG_INT or ARG_DOUBLE
449        U_ASSERT(MessagePattern::Part::hasNumericValue(type));
450        double boundary = pattern.getNumericValue(part);
451        // Fetch the ARG_SELECTOR character.
452        int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
453        UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
454        if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
455            // The number is in the interval between the previous boundary and the current one.
456            // Return with the sub-message between them.
457            // The !(a>b) and !(a>=b) comparisons are equivalent to
458            // (a<=b) and (a<b) except they "catch" NaN.
459            break;
460        }
461    }
462    return msgStart;
463}
464
465// -------------------------------------
466// Formats an array of objects. Checks if the data type of the objects
467// to get the right value for formatting.
468
469UnicodeString&
470ChoiceFormat::format(const Formattable* objs,
471                     int32_t cnt,
472                     UnicodeString& appendTo,
473                     FieldPosition& pos,
474                     UErrorCode& status) const
475{
476    if(cnt < 0) {
477        status = U_ILLEGAL_ARGUMENT_ERROR;
478        return appendTo;
479    }
480    if (msgPattern.countParts() == 0) {
481        status = U_INVALID_STATE_ERROR;
482        return appendTo;
483    }
484
485    for (int32_t i = 0; i < cnt; i++) {
486        double objDouble = objs[i].getDouble(status);
487        if (U_SUCCESS(status)) {
488            format(objDouble, appendTo, pos);
489        }
490    }
491
492    return appendTo;
493}
494
495// -------------------------------------
496
497void
498ChoiceFormat::parse(const UnicodeString& text,
499                    Formattable& result,
500                    ParsePosition& pos) const
501{
502    result.setDouble(parseArgument(msgPattern, 0, text, pos));
503}
504
505double
506ChoiceFormat::parseArgument(
507        const MessagePattern &pattern, int32_t partIndex,
508        const UnicodeString &source, ParsePosition &pos) {
509    // find the best number (defined as the one with the longest parse)
510    int32_t start = pos.getIndex();
511    int32_t furthest = start;
512    double bestNumber = uprv_getNaN();
513    double tempNumber = 0.0;
514    int32_t count = pattern.countParts();
515    while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
516        tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
517        partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
518        int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
519        int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
520        if (len >= 0) {
521            int32_t newIndex = start + len;
522            if (newIndex > furthest) {
523                furthest = newIndex;
524                bestNumber = tempNumber;
525                if (furthest == source.length()) {
526                    break;
527                }
528            }
529        }
530        partIndex = msgLimit + 1;
531    }
532    if (furthest == start) {
533        pos.setErrorIndex(start);
534    } else {
535        pos.setIndex(furthest);
536    }
537    return bestNumber;
538}
539
540int32_t
541ChoiceFormat::matchStringUntilLimitPart(
542        const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
543        const UnicodeString &source, int32_t sourceOffset) {
544    int32_t matchingSourceLength = 0;
545    const UnicodeString &msgString = pattern.getPatternString();
546    int32_t prevIndex = pattern.getPart(partIndex).getLimit();
547    for (;;) {
548        const MessagePattern::Part &part = pattern.getPart(++partIndex);
549        if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
550            int32_t index = part.getIndex();
551            int32_t length = index - prevIndex;
552            if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
553                return -1;  // mismatch
554            }
555            matchingSourceLength += length;
556            if (partIndex == limitPartIndex) {
557                return matchingSourceLength;
558            }
559            prevIndex = part.getLimit();  // SKIP_SYNTAX
560        }
561    }
562}
563
564// -------------------------------------
565
566Format*
567ChoiceFormat::clone() const
568{
569    ChoiceFormat *aCopy = new ChoiceFormat(*this);
570    return aCopy;
571}
572
573U_NAMESPACE_END
574
575#endif /* #if !UCONFIG_NO_FORMATTING */
576
577//eof
578