1/*
2*******************************************************************************
3* Copyright (C) 1997-2013, International Business Machines Corporation and    *
4* others. All Rights Reserved.                                                *
5*******************************************************************************
6*
7* File CHOICFMT.CPP
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   02/19/97    aliu        Converted from java.
13*   03/20/97    helena      Finished first cut of implementation and got rid
14*                           of nextDouble/previousDouble and replaced with
15*                           boolean array.
16*   4/10/97     aliu        Clean up.  Modified to work on AIX.
17*   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include
18*                           wchar.h.
19*   07/09/97    helena      Made ParsePosition into a class.
20*   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
21*   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
22*   02/22/99    stephen     Removed character literals for EBCDIC safety
23********************************************************************************
24*/
25
26#include "unicode/utypes.h"
27
28#if !UCONFIG_NO_FORMATTING
29
30#include "unicode/choicfmt.h"
31#include "unicode/numfmt.h"
32#include "unicode/locid.h"
33#include "cpputils.h"
34#include "cstring.h"
35#include "messageimpl.h"
36#include "putilimp.h"
37#include "uassert.h"
38#include <stdio.h>
39#include <float.h>
40
41// *****************************************************************************
42// class ChoiceFormat
43// *****************************************************************************
44
45U_NAMESPACE_BEGIN
46
47UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
48
49// Special characters used by ChoiceFormat.  There are two characters
50// used interchangeably to indicate <=.  Either is parsed, but only
51// LESS_EQUAL is generated by toPattern().
52#define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
53#define LESS_THAN    ((UChar)0x003C)   /*<*/
54#define LESS_EQUAL   ((UChar)0x0023)   /*#*/
55#define LESS_EQUAL2  ((UChar)0x2264)
56#define VERTICAL_BAR ((UChar)0x007C)   /*|*/
57#define MINUS        ((UChar)0x002D)   /*-*/
58
59static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/
60static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/
61
62#ifdef INFINITY
63#undef INFINITY
64#endif
65#define INFINITY     ((UChar)0x221E)
66
67//static const UChar gPositiveInfinity[] = {INFINITY, 0};
68//static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
69#define POSITIVE_INF_STRLEN 1
70#define NEGATIVE_INF_STRLEN 2
71
72// -------------------------------------
73// Creates a ChoiceFormat instance based on the pattern.
74
75ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
76                           UErrorCode& status)
77: constructorErrorCode(status),
78  msgPattern(status)
79{
80    applyPattern(newPattern, status);
81}
82
83// -------------------------------------
84// Creates a ChoiceFormat instance with the limit array and
85// format strings for each limit.
86
87ChoiceFormat::ChoiceFormat(const double* limits,
88                           const UnicodeString* formats,
89                           int32_t cnt )
90: constructorErrorCode(U_ZERO_ERROR),
91  msgPattern(constructorErrorCode)
92{
93    setChoices(limits, NULL, formats, cnt, constructorErrorCode);
94}
95
96// -------------------------------------
97
98ChoiceFormat::ChoiceFormat(const double* limits,
99                           const UBool* closures,
100                           const UnicodeString* formats,
101                           int32_t cnt )
102: constructorErrorCode(U_ZERO_ERROR),
103  msgPattern(constructorErrorCode)
104{
105    setChoices(limits, closures, formats, cnt, constructorErrorCode);
106}
107
108// -------------------------------------
109// copy constructor
110
111ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that)
112: NumberFormat(that),
113  constructorErrorCode(that.constructorErrorCode),
114  msgPattern(that.msgPattern)
115{
116}
117
118// -------------------------------------
119// Private constructor that creates a
120// ChoiceFormat instance based on the
121// pattern and populates UParseError
122
123ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
124                           UParseError& parseError,
125                           UErrorCode& status)
126: constructorErrorCode(status),
127  msgPattern(status)
128{
129    applyPattern(newPattern,parseError, status);
130}
131// -------------------------------------
132
133UBool
134ChoiceFormat::operator==(const Format& that) const
135{
136    if (this == &that) return TRUE;
137    if (!NumberFormat::operator==(that)) return FALSE;
138    ChoiceFormat& thatAlias = (ChoiceFormat&)that;
139    return msgPattern == thatAlias.msgPattern;
140}
141
142// -------------------------------------
143// copy constructor
144
145const ChoiceFormat&
146ChoiceFormat::operator=(const   ChoiceFormat& that)
147{
148    if (this != &that) {
149        NumberFormat::operator=(that);
150        constructorErrorCode = that.constructorErrorCode;
151        msgPattern = that.msgPattern;
152    }
153    return *this;
154}
155
156// -------------------------------------
157
158ChoiceFormat::~ChoiceFormat()
159{
160}
161
162// -------------------------------------
163
164/**
165 * Convert a double value to a string without the overhead of NumberFormat.
166 */
167UnicodeString&
168ChoiceFormat::dtos(double value,
169                   UnicodeString& string)
170{
171    /* Buffer to contain the digits and any extra formatting stuff. */
172    char temp[DBL_DIG + 16];
173    char *itrPtr = temp;
174    char *expPtr;
175
176    sprintf(temp, "%.*g", DBL_DIG, value);
177
178    /* Find and convert the decimal point.
179       Using setlocale on some machines will cause sprintf to use a comma for certain locales.
180    */
181    while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
182        itrPtr++;
183    }
184    if (*itrPtr != 0 && *itrPtr != 'e') {
185        /* We reached something that looks like a decimal point.
186        In case someone used setlocale(), which changes the decimal point. */
187        *itrPtr = '.';
188        itrPtr++;
189    }
190    /* Search for the exponent */
191    while (*itrPtr && *itrPtr != 'e') {
192        itrPtr++;
193    }
194    if (*itrPtr == 'e') {
195        itrPtr++;
196        /* Verify the exponent sign */
197        if (*itrPtr == '+' || *itrPtr == '-') {
198            itrPtr++;
199        }
200        /* Remove leading zeros. You will see this on Windows machines. */
201        expPtr = itrPtr;
202        while (*itrPtr == '0') {
203            itrPtr++;
204        }
205        if (*itrPtr && expPtr != itrPtr) {
206            /* Shift the exponent without zeros. */
207            while (*itrPtr) {
208                *(expPtr++)  = *(itrPtr++);
209            }
210            // NULL terminate
211            *expPtr = 0;
212        }
213    }
214
215    string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
216    return string;
217}
218
219// -------------------------------------
220// calls the overloaded applyPattern method.
221
222void
223ChoiceFormat::applyPattern(const UnicodeString& pattern,
224                           UErrorCode& status)
225{
226    msgPattern.parseChoiceStyle(pattern, NULL, status);
227    constructorErrorCode = status;
228}
229
230// -------------------------------------
231// Applies the pattern to this ChoiceFormat instance.
232
233void
234ChoiceFormat::applyPattern(const UnicodeString& pattern,
235                           UParseError& parseError,
236                           UErrorCode& status)
237{
238    msgPattern.parseChoiceStyle(pattern, &parseError, status);
239    constructorErrorCode = status;
240}
241// -------------------------------------
242// Returns the input pattern string.
243
244UnicodeString&
245ChoiceFormat::toPattern(UnicodeString& result) const
246{
247    return result = msgPattern.getPatternString();
248}
249
250// -------------------------------------
251// Sets the limit and format arrays.
252void
253ChoiceFormat::setChoices(  const double* limits,
254                           const UnicodeString* formats,
255                           int32_t cnt )
256{
257    UErrorCode errorCode = U_ZERO_ERROR;
258    setChoices(limits, NULL, formats, cnt, errorCode);
259}
260
261// -------------------------------------
262// Sets the limit and format arrays.
263void
264ChoiceFormat::setChoices(  const double* limits,
265                           const UBool* closures,
266                           const UnicodeString* formats,
267                           int32_t cnt )
268{
269    UErrorCode errorCode = U_ZERO_ERROR;
270    setChoices(limits, closures, formats, cnt, errorCode);
271}
272
273void
274ChoiceFormat::setChoices(const double* limits,
275                         const UBool* closures,
276                         const UnicodeString* formats,
277                         int32_t count,
278                         UErrorCode &errorCode) {
279    if (U_FAILURE(errorCode)) {
280        return;
281    }
282    if (limits == NULL || formats == NULL) {
283        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
284        return;
285    }
286    // Reconstruct the original input pattern.
287    // Modified version of the pre-ICU 4.8 toPattern() implementation.
288    UnicodeString result;
289    for (int32_t i = 0; i < count; ++i) {
290        if (i != 0) {
291            result += VERTICAL_BAR;
292        }
293        UnicodeString buf;
294        if (uprv_isPositiveInfinity(limits[i])) {
295            result += INFINITY;
296        } else if (uprv_isNegativeInfinity(limits[i])) {
297            result += MINUS;
298            result += INFINITY;
299        } else {
300            result += dtos(limits[i], buf);
301        }
302        if (closures != NULL && closures[i]) {
303            result += LESS_THAN;
304        } else {
305            result += LESS_EQUAL;
306        }
307        // Append formats[i], using quotes if there are special
308        // characters.  Single quotes themselves must be escaped in
309        // either case.
310        const UnicodeString& text = formats[i];
311        int32_t textLength = text.length();
312        int32_t nestingLevel = 0;
313        for (int32_t j = 0; j < textLength; ++j) {
314            UChar c = text[j];
315            if (c == SINGLE_QUOTE && nestingLevel == 0) {
316                // Double each top-level apostrophe.
317                result.append(c);
318            } else if (c == VERTICAL_BAR && nestingLevel == 0) {
319                // Surround each pipe symbol with apostrophes for quoting.
320                // If the next character is an apostrophe, then that will be doubled,
321                // and although the parser will see the apostrophe pairs beginning
322                // and ending one character earlier than our doubling, the result
323                // is as desired.
324                //   | -> '|'
325                //   |' -> '|'''
326                //   |'' -> '|''''' etc.
327                result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
328                continue;  // Skip the append(c) at the end of the loop body.
329            } else if (c == LEFT_CURLY_BRACE) {
330                ++nestingLevel;
331            } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
332                --nestingLevel;
333            }
334            result.append(c);
335        }
336    }
337    // Apply the reconstructed pattern.
338    applyPattern(result, errorCode);
339}
340
341// -------------------------------------
342// Gets the limit array.
343
344const double*
345ChoiceFormat::getLimits(int32_t& cnt) const
346{
347    cnt = 0;
348    return NULL;
349}
350
351// -------------------------------------
352// Gets the closures array.
353
354const UBool*
355ChoiceFormat::getClosures(int32_t& cnt) const
356{
357    cnt = 0;
358    return NULL;
359}
360
361// -------------------------------------
362// Gets the format array.
363
364const UnicodeString*
365ChoiceFormat::getFormats(int32_t& cnt) const
366{
367    cnt = 0;
368    return NULL;
369}
370
371// -------------------------------------
372// Formats an int64 number, it's actually formatted as
373// a double.  The returned format string may differ
374// from the input number because of this.
375
376UnicodeString&
377ChoiceFormat::format(int64_t number,
378                     UnicodeString& appendTo,
379                     FieldPosition& status) const
380{
381    return format((double) number, appendTo, status);
382}
383
384// -------------------------------------
385// Formats an int32_t number, it's actually formatted as
386// a double.
387
388UnicodeString&
389ChoiceFormat::format(int32_t number,
390                     UnicodeString& appendTo,
391                     FieldPosition& status) const
392{
393    return format((double) number, appendTo, status);
394}
395
396// -------------------------------------
397// Formats a double number.
398
399UnicodeString&
400ChoiceFormat::format(double number,
401                     UnicodeString& appendTo,
402                     FieldPosition& /*pos*/) const
403{
404    if (msgPattern.countParts() == 0) {
405        // No pattern was applied, or it failed.
406        return appendTo;
407    }
408    // Get the appropriate sub-message.
409    int32_t msgStart = findSubMessage(msgPattern, 0, number);
410    if (!MessageImpl::jdkAposMode(msgPattern)) {
411        int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
412        int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
413        appendTo.append(msgPattern.getPatternString(),
414                        patternStart,
415                        msgPattern.getPatternIndex(msgLimit) - patternStart);
416        return appendTo;
417    }
418    // JDK compatibility mode: Remove SKIP_SYNTAX.
419    return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
420}
421
422int32_t
423ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
424    int32_t count = pattern.countParts();
425    int32_t msgStart;
426    // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
427    // until ARG_LIMIT or end of choice-only pattern.
428    // Ignore the first number and selector and start the loop on the first message.
429    partIndex += 2;
430    for (;;) {
431        // Skip but remember the current sub-message.
432        msgStart = partIndex;
433        partIndex = pattern.getLimitPartIndex(partIndex);
434        if (++partIndex >= count) {
435            // Reached the end of the choice-only pattern.
436            // Return with the last sub-message.
437            break;
438        }
439        const MessagePattern::Part &part = pattern.getPart(partIndex++);
440        UMessagePatternPartType type = part.getType();
441        if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
442            // Reached the end of the ChoiceFormat style.
443            // Return with the last sub-message.
444            break;
445        }
446        // part is an ARG_INT or ARG_DOUBLE
447        U_ASSERT(MessagePattern::Part::hasNumericValue(type));
448        double boundary = pattern.getNumericValue(part);
449        // Fetch the ARG_SELECTOR character.
450        int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
451        UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
452        if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
453            // The number is in the interval between the previous boundary and the current one.
454            // Return with the sub-message between them.
455            // The !(a>b) and !(a>=b) comparisons are equivalent to
456            // (a<=b) and (a<b) except they "catch" NaN.
457            break;
458        }
459    }
460    return msgStart;
461}
462
463// -------------------------------------
464// Formats an array of objects. Checks if the data type of the objects
465// to get the right value for formatting.
466
467UnicodeString&
468ChoiceFormat::format(const Formattable* objs,
469                     int32_t cnt,
470                     UnicodeString& appendTo,
471                     FieldPosition& pos,
472                     UErrorCode& status) const
473{
474    if(cnt < 0) {
475        status = U_ILLEGAL_ARGUMENT_ERROR;
476        return appendTo;
477    }
478    if (msgPattern.countParts() == 0) {
479        status = U_INVALID_STATE_ERROR;
480        return appendTo;
481    }
482
483    for (int32_t i = 0; i < cnt; i++) {
484        double objDouble = objs[i].getDouble(status);
485        if (U_SUCCESS(status)) {
486            format(objDouble, appendTo, pos);
487        }
488    }
489
490    return appendTo;
491}
492
493// -------------------------------------
494
495void
496ChoiceFormat::parse(const UnicodeString& text,
497                    Formattable& result,
498                    ParsePosition& pos) const
499{
500    result.setDouble(parseArgument(msgPattern, 0, text, pos));
501}
502
503double
504ChoiceFormat::parseArgument(
505        const MessagePattern &pattern, int32_t partIndex,
506        const UnicodeString &source, ParsePosition &pos) {
507    // find the best number (defined as the one with the longest parse)
508    int32_t start = pos.getIndex();
509    int32_t furthest = start;
510    double bestNumber = uprv_getNaN();
511    double tempNumber = 0.0;
512    int32_t count = pattern.countParts();
513    while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
514        tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
515        partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
516        int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
517        int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
518        if (len >= 0) {
519            int32_t newIndex = start + len;
520            if (newIndex > furthest) {
521                furthest = newIndex;
522                bestNumber = tempNumber;
523                if (furthest == source.length()) {
524                    break;
525                }
526            }
527        }
528        partIndex = msgLimit + 1;
529    }
530    if (furthest == start) {
531        pos.setErrorIndex(start);
532    } else {
533        pos.setIndex(furthest);
534    }
535    return bestNumber;
536}
537
538int32_t
539ChoiceFormat::matchStringUntilLimitPart(
540        const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
541        const UnicodeString &source, int32_t sourceOffset) {
542    int32_t matchingSourceLength = 0;
543    const UnicodeString &msgString = pattern.getPatternString();
544    int32_t prevIndex = pattern.getPart(partIndex).getLimit();
545    for (;;) {
546        const MessagePattern::Part &part = pattern.getPart(++partIndex);
547        if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
548            int32_t index = part.getIndex();
549            int32_t length = index - prevIndex;
550            if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
551                return -1;  // mismatch
552            }
553            matchingSourceLength += length;
554            if (partIndex == limitPartIndex) {
555                return matchingSourceLength;
556            }
557            prevIndex = part.getLimit();  // SKIP_SYNTAX
558        }
559    }
560}
561
562// -------------------------------------
563
564Format*
565ChoiceFormat::clone() const
566{
567    ChoiceFormat *aCopy = new ChoiceFormat(*this);
568    return aCopy;
569}
570
571U_NAMESPACE_END
572
573#endif /* #if !UCONFIG_NO_FORMATTING */
574
575//eof
576