1/*
2 * Copyright (C) 2015, International Business Machines
3 * Corporation and others.  All Rights Reserved.
4 *
5 * file name: affixpatternparser.cpp
6 */
7
8#include "unicode/utypes.h"
9
10#if !UCONFIG_NO_FORMATTING
11
12#include "unicode/dcfmtsym.h"
13#include "unicode/plurrule.h"
14#include "unicode/ucurr.h"
15#include "affixpatternparser.h"
16#include "charstr.h"
17#include "precision.h"
18#include "uassert.h"
19#include "unistrappender.h"
20
21        static UChar gDefaultSymbols[] = {0xa4, 0xa4, 0xa4};
22
23static UChar gPercent = 0x25;
24static UChar gPerMill = 0x2030;
25static UChar gNegative = 0x2D;
26static UChar gPositive = 0x2B;
27
28#define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) | (l & 0xFF)))
29
30#define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F))
31
32#define UNPACK_LONG(c) (((c) >> 8) & 0x80)
33
34#define UNPACK_LENGTH(c) ((c) & 0xFF)
35
36U_NAMESPACE_BEGIN
37
38static int32_t
39nextToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) {
40    if (buffer[idx] != 0x27 || idx + 1 == len) {
41        *token = buffer[idx];
42        return 1;
43    }
44    *token = buffer[idx + 1];
45    if (buffer[idx + 1] == 0xA4) {
46        int32_t i = 2;
47        for (; idx + i < len && i < 4 && buffer[idx + i] == buffer[idx + 1]; ++i);
48        return i;
49    }
50    return 2;
51}
52
53static int32_t
54nextUserToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) {
55    *token = buffer[idx];
56    int32_t max;
57    switch (buffer[idx]) {
58    case 0x27:
59        max = 2;
60        break;
61    case 0xA4:
62        max = 3;
63        break;
64    default:
65        max = 1;
66        break;
67    }
68    int32_t i = 1;
69    for (; idx + i < len && i < max && buffer[idx + i] == buffer[idx]; ++i);
70    return i;
71}
72
73CurrencyAffixInfo::CurrencyAffixInfo()
74        : fSymbol(gDefaultSymbols, 1),
75          fISO(gDefaultSymbols, 2),
76          fLong(DigitAffix(gDefaultSymbols, 3)),
77          fIsDefault(TRUE) {
78}
79
80void
81CurrencyAffixInfo::set(
82        const char *locale,
83        const PluralRules *rules,
84        const UChar *currency,
85        UErrorCode &status) {
86    if (U_FAILURE(status)) {
87        return;
88    }
89    fIsDefault = FALSE;
90    if (currency == NULL) {
91        fSymbol.setTo(gDefaultSymbols, 1);
92        fISO.setTo(gDefaultSymbols, 2);
93        fLong.remove();
94        fLong.append(gDefaultSymbols, 3);
95        fIsDefault = TRUE;
96        return;
97    }
98    int32_t len;
99    UBool unusedIsChoice;
100    const UChar *symbol = ucurr_getName(
101            currency, locale, UCURR_SYMBOL_NAME, &unusedIsChoice,
102            &len, &status);
103    if (U_FAILURE(status)) {
104        return;
105    }
106    fSymbol.setTo(symbol, len);
107    fISO.setTo(currency, u_strlen(currency));
108    fLong.remove();
109    StringEnumeration* keywords = rules->getKeywords(status);
110    if (U_FAILURE(status)) {
111        return;
112    }
113    const UnicodeString* pluralCount;
114    while ((pluralCount = keywords->snext(status)) != NULL) {
115        CharString pCount;
116        pCount.appendInvariantChars(*pluralCount, status);
117        const UChar *pluralName = ucurr_getPluralName(
118            currency, locale, &unusedIsChoice, pCount.data(),
119            &len, &status);
120        fLong.setVariant(pCount.data(), UnicodeString(pluralName, len), status);
121    }
122    delete keywords;
123}
124
125void
126CurrencyAffixInfo::adjustPrecision(
127        const UChar *currency, const UCurrencyUsage usage,
128        FixedPrecision &precision, UErrorCode &status) {
129    if (U_FAILURE(status)) {
130        return;
131    }
132
133    int32_t digitCount = ucurr_getDefaultFractionDigitsForUsage(
134            currency, usage, &status);
135    precision.fMin.setFracDigitCount(digitCount);
136    precision.fMax.setFracDigitCount(digitCount);
137    double increment = ucurr_getRoundingIncrementForUsage(
138            currency, usage, &status);
139    if (increment == 0.0) {
140        precision.fRoundingIncrement.clear();
141    } else {
142        precision.fRoundingIncrement.set(increment);
143        // guard against round-off error
144        precision.fRoundingIncrement.round(6);
145    }
146}
147
148void
149AffixPattern::addLiteral(
150        const UChar *literal, int32_t start, int32_t len) {
151    char32Count += u_countChar32(literal + start, len);
152    literals.append(literal, start, len);
153    int32_t tlen = tokens.length();
154    // Takes 4 UChars to encode maximum literal length.
155    UChar *tokenChars = tokens.getBuffer(tlen + 4);
156
157    // find start of literal size. May be tlen if there is no literal.
158    // While finding start of literal size, compute literal length
159    int32_t literalLength = 0;
160    int32_t tLiteralStart = tlen;
161    while (tLiteralStart > 0 && UNPACK_TOKEN(tokenChars[tLiteralStart - 1]) == kLiteral) {
162        tLiteralStart--;
163        literalLength <<= 8;
164        literalLength |= UNPACK_LENGTH(tokenChars[tLiteralStart]);
165    }
166    // Add number of chars we just added to literal
167    literalLength += len;
168
169    // Now encode the new length starting at tLiteralStart
170    tlen = tLiteralStart;
171    tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral, literalLength & 0xFF);
172    literalLength >>= 8;
173    while (literalLength) {
174        tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral | 0x80, literalLength & 0xFF);
175        literalLength >>= 8;
176    }
177    tokens.releaseBuffer(tlen);
178}
179
180void
181AffixPattern::add(ETokenType t) {
182    add(t, 1);
183}
184
185void
186AffixPattern::addCurrency(uint8_t count) {
187    add(kCurrency, count);
188}
189
190void
191AffixPattern::add(ETokenType t, uint8_t count) {
192    U_ASSERT(t != kLiteral);
193    char32Count += count;
194    switch (t) {
195    case kCurrency:
196        hasCurrencyToken = TRUE;
197        break;
198    case kPercent:
199        hasPercentToken = TRUE;
200        break;
201    case kPerMill:
202        hasPermillToken = TRUE;
203        break;
204    default:
205        // Do nothing
206        break;
207    }
208    tokens.append(PACK_TOKEN_AND_LENGTH(t, count));
209}
210
211AffixPattern &
212AffixPattern::append(const AffixPattern &other) {
213    AffixPatternIterator iter;
214    other.iterator(iter);
215    UnicodeString literal;
216    while (iter.nextToken()) {
217        switch (iter.getTokenType()) {
218        case kLiteral:
219            iter.getLiteral(literal);
220            addLiteral(literal.getBuffer(), 0, literal.length());
221            break;
222        case kCurrency:
223            addCurrency(iter.getTokenLength());
224            break;
225        default:
226            add(iter.getTokenType());
227            break;
228        }
229    }
230    return *this;
231}
232
233void
234AffixPattern::remove() {
235    tokens.remove();
236    literals.remove();
237    hasCurrencyToken = FALSE;
238    hasPercentToken = FALSE;
239    hasPermillToken = FALSE;
240    char32Count = 0;
241}
242
243// escapes literals for strings where special characters are NOT escaped
244// except for apostrophe.
245static void escapeApostropheInLiteral(
246        const UnicodeString &literal, UnicodeStringAppender &appender) {
247    int32_t len = literal.length();
248    const UChar *buffer = literal.getBuffer();
249    for (int32_t i = 0; i < len; ++i) {
250        UChar ch = buffer[i];
251        switch (ch) {
252            case 0x27:
253                appender.append((UChar) 0x27);
254                appender.append((UChar) 0x27);
255                break;
256            default:
257                appender.append(ch);
258                break;
259        }
260    }
261}
262
263
264// escapes literals for user strings where special characters in literals
265// are escaped with apostrophe.
266static void escapeLiteral(
267        const UnicodeString &literal, UnicodeStringAppender &appender) {
268    int32_t len = literal.length();
269    const UChar *buffer = literal.getBuffer();
270    for (int32_t i = 0; i < len; ++i) {
271        UChar ch = buffer[i];
272        switch (ch) {
273            case 0x27:
274                appender.append((UChar) 0x27);
275                appender.append((UChar) 0x27);
276                break;
277            case 0x25:
278                appender.append((UChar) 0x27);
279                appender.append((UChar) 0x25);
280                appender.append((UChar) 0x27);
281                break;
282            case 0x2030:
283                appender.append((UChar) 0x27);
284                appender.append((UChar) 0x2030);
285                appender.append((UChar) 0x27);
286                break;
287            case 0xA4:
288                appender.append((UChar) 0x27);
289                appender.append((UChar) 0xA4);
290                appender.append((UChar) 0x27);
291                break;
292            case 0x2D:
293                appender.append((UChar) 0x27);
294                appender.append((UChar) 0x2D);
295                appender.append((UChar) 0x27);
296                break;
297            case 0x2B:
298                appender.append((UChar) 0x27);
299                appender.append((UChar) 0x2B);
300                appender.append((UChar) 0x27);
301                break;
302            default:
303                appender.append(ch);
304                break;
305        }
306    }
307}
308
309UnicodeString &
310AffixPattern::toString(UnicodeString &appendTo) const {
311    AffixPatternIterator iter;
312    iterator(iter);
313    UnicodeStringAppender appender(appendTo);
314    UnicodeString literal;
315    while (iter.nextToken()) {
316        switch (iter.getTokenType()) {
317        case kLiteral:
318            escapeApostropheInLiteral(iter.getLiteral(literal), appender);
319            break;
320        case kPercent:
321            appender.append((UChar) 0x27);
322            appender.append((UChar) 0x25);
323            break;
324        case kPerMill:
325            appender.append((UChar) 0x27);
326            appender.append((UChar) 0x2030);
327            break;
328        case kCurrency:
329            {
330                appender.append((UChar) 0x27);
331                int32_t cl = iter.getTokenLength();
332                for (int32_t i = 0; i < cl; ++i) {
333                    appender.append((UChar) 0xA4);
334                }
335            }
336            break;
337        case kNegative:
338            appender.append((UChar) 0x27);
339            appender.append((UChar) 0x2D);
340            break;
341        case kPositive:
342            appender.append((UChar) 0x27);
343            appender.append((UChar) 0x2B);
344            break;
345        default:
346            U_ASSERT(FALSE);
347            break;
348        }
349    }
350    return appendTo;
351}
352
353UnicodeString &
354AffixPattern::toUserString(UnicodeString &appendTo) const {
355    AffixPatternIterator iter;
356    iterator(iter);
357    UnicodeStringAppender appender(appendTo);
358    UnicodeString literal;
359    while (iter.nextToken()) {
360        switch (iter.getTokenType()) {
361        case kLiteral:
362            escapeLiteral(iter.getLiteral(literal), appender);
363            break;
364        case kPercent:
365            appender.append((UChar) 0x25);
366            break;
367        case kPerMill:
368            appender.append((UChar) 0x2030);
369            break;
370        case kCurrency:
371            {
372                int32_t cl = iter.getTokenLength();
373                for (int32_t i = 0; i < cl; ++i) {
374                    appender.append((UChar) 0xA4);
375                }
376            }
377            break;
378        case kNegative:
379            appender.append((UChar) 0x2D);
380            break;
381        case kPositive:
382            appender.append((UChar) 0x2B);
383            break;
384        default:
385            U_ASSERT(FALSE);
386            break;
387        }
388    }
389    return appendTo;
390}
391
392class AffixPatternAppender : public UMemory {
393public:
394    AffixPatternAppender(AffixPattern &dest) : fDest(&dest), fIdx(0) { }
395
396    inline void append(UChar x) {
397        if (fIdx == UPRV_LENGTHOF(fBuffer)) {
398            fDest->addLiteral(fBuffer, 0, fIdx);
399            fIdx = 0;
400        }
401        fBuffer[fIdx++] = x;
402    }
403
404    inline void append(UChar32 x) {
405        if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) {
406            fDest->addLiteral(fBuffer, 0, fIdx);
407            fIdx = 0;
408        }
409        U16_APPEND_UNSAFE(fBuffer, fIdx, x);
410    }
411
412    inline void flush() {
413        if (fIdx) {
414            fDest->addLiteral(fBuffer, 0, fIdx);
415        }
416        fIdx = 0;
417    }
418
419    /**
420     * flush the buffer when we go out of scope.
421     */
422    ~AffixPatternAppender() {
423        flush();
424    }
425private:
426    AffixPattern *fDest;
427    int32_t fIdx;
428    UChar fBuffer[32];
429    AffixPatternAppender(const AffixPatternAppender &other);
430    AffixPatternAppender &operator=(const AffixPatternAppender &other);
431};
432
433
434AffixPattern &
435AffixPattern::parseUserAffixString(
436        const UnicodeString &affixStr,
437        AffixPattern &appendTo,
438        UErrorCode &status) {
439    if (U_FAILURE(status)) {
440        return appendTo;
441    }
442    int32_t len = affixStr.length();
443    const UChar *buffer = affixStr.getBuffer();
444    // 0 = not quoted; 1 = quoted.
445    int32_t state = 0;
446    AffixPatternAppender appender(appendTo);
447    for (int32_t i = 0; i < len; ) {
448        UChar token;
449        int32_t tokenSize = nextUserToken(buffer, i, len, &token);
450        i += tokenSize;
451        if (token == 0x27 && tokenSize == 1) { // quote
452            state = 1 - state;
453            continue;
454        }
455        if (state == 0) {
456            switch (token) {
457            case 0x25:
458                appender.flush();
459                appendTo.add(kPercent, 1);
460                break;
461            case 0x27:  // double quote
462                appender.append((UChar) 0x27);
463                break;
464            case 0x2030:
465                appender.flush();
466                appendTo.add(kPerMill, 1);
467                break;
468            case 0x2D:
469                appender.flush();
470                appendTo.add(kNegative, 1);
471                break;
472            case 0x2B:
473                appender.flush();
474                appendTo.add(kPositive, 1);
475                break;
476            case 0xA4:
477                appender.flush();
478                appendTo.add(kCurrency, tokenSize);
479                break;
480            default:
481                appender.append(token);
482                break;
483            }
484        } else {
485            switch (token) {
486            case 0x27:  // double quote
487                appender.append((UChar) 0x27);
488                break;
489            case 0xA4: // included b/c tokenSize can be > 1
490                for (int32_t j = 0; j < tokenSize; ++j) {
491                    appender.append((UChar) 0xA4);
492                }
493                break;
494            default:
495                appender.append(token);
496                break;
497            }
498        }
499    }
500    return appendTo;
501}
502
503AffixPattern &
504AffixPattern::parseAffixString(
505        const UnicodeString &affixStr,
506        AffixPattern &appendTo,
507        UErrorCode &status) {
508    if (U_FAILURE(status)) {
509        return appendTo;
510    }
511    int32_t len = affixStr.length();
512    const UChar *buffer = affixStr.getBuffer();
513    for (int32_t i = 0; i < len; ) {
514        UChar token;
515        int32_t tokenSize = nextToken(buffer, i, len, &token);
516        if (tokenSize == 1) {
517            int32_t literalStart = i;
518            ++i;
519            while (i < len && (tokenSize = nextToken(buffer, i, len, &token)) == 1) {
520                ++i;
521            }
522            appendTo.addLiteral(buffer, literalStart, i - literalStart);
523
524            // If we reached end of string, we are done
525            if (i == len) {
526                return appendTo;
527            }
528        }
529        i += tokenSize;
530        switch (token) {
531        case 0x25:
532            appendTo.add(kPercent, 1);
533            break;
534        case 0x2030:
535            appendTo.add(kPerMill, 1);
536            break;
537        case 0x2D:
538            appendTo.add(kNegative, 1);
539            break;
540        case 0x2B:
541            appendTo.add(kPositive, 1);
542            break;
543        case 0xA4:
544            {
545                if (tokenSize - 1 > 3) {
546                    status = U_PARSE_ERROR;
547                    return appendTo;
548                }
549                appendTo.add(kCurrency, tokenSize - 1);
550            }
551            break;
552        default:
553            appendTo.addLiteral(&token, 0, 1);
554            break;
555        }
556    }
557    return appendTo;
558}
559
560AffixPatternIterator &
561AffixPattern::iterator(AffixPatternIterator &result) const {
562    result.nextLiteralIndex = 0;
563    result.lastLiteralLength = 0;
564    result.nextTokenIndex = 0;
565    result.tokens = &tokens;
566    result.literals = &literals;
567    return result;
568}
569
570UBool
571AffixPatternIterator::nextToken() {
572    int32_t tlen = tokens->length();
573    if (nextTokenIndex == tlen) {
574        return FALSE;
575    }
576    ++nextTokenIndex;
577    const UChar *tokenBuffer = tokens->getBuffer();
578    if (UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]) ==
579            AffixPattern::kLiteral) {
580        while (nextTokenIndex < tlen &&
581                UNPACK_LONG(tokenBuffer[nextTokenIndex])) {
582            ++nextTokenIndex;
583        }
584        lastLiteralLength = 0;
585        int32_t i = nextTokenIndex - 1;
586        for (; UNPACK_LONG(tokenBuffer[i]); --i) {
587            lastLiteralLength <<= 8;
588            lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]);
589        }
590        lastLiteralLength <<= 8;
591        lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]);
592        nextLiteralIndex += lastLiteralLength;
593    }
594    return TRUE;
595}
596
597AffixPattern::ETokenType
598AffixPatternIterator::getTokenType() const {
599    return UNPACK_TOKEN(tokens->charAt(nextTokenIndex - 1));
600}
601
602UnicodeString &
603AffixPatternIterator::getLiteral(UnicodeString &result) const {
604    const UChar *buffer = literals->getBuffer();
605    result.setTo(buffer + (nextLiteralIndex - lastLiteralLength), lastLiteralLength);
606    return result;
607}
608
609int32_t
610AffixPatternIterator::getTokenLength() const {
611    const UChar *tokenBuffer = tokens->getBuffer();
612    AffixPattern::ETokenType type = UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]);
613    return type == AffixPattern::kLiteral ? lastLiteralLength : UNPACK_LENGTH(tokenBuffer[nextTokenIndex - 1]);
614}
615
616AffixPatternParser::AffixPatternParser()
617        : fPercent(gPercent), fPermill(gPerMill), fNegative(gNegative), fPositive(gPositive) {
618}
619
620AffixPatternParser::AffixPatternParser(
621        const DecimalFormatSymbols &symbols) {
622    setDecimalFormatSymbols(symbols);
623}
624
625void
626AffixPatternParser::setDecimalFormatSymbols(
627        const DecimalFormatSymbols &symbols) {
628    fPercent = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);
629    fPermill = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);
630    fNegative = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
631    fPositive = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
632}
633
634PluralAffix &
635AffixPatternParser::parse(
636        const AffixPattern &affixPattern,
637        const CurrencyAffixInfo &currencyAffixInfo,
638        PluralAffix &appendTo,
639        UErrorCode &status) const {
640    if (U_FAILURE(status)) {
641        return appendTo;
642    }
643    AffixPatternIterator iter;
644    affixPattern.iterator(iter);
645    UnicodeString literal;
646    while (iter.nextToken()) {
647        switch (iter.getTokenType()) {
648        case AffixPattern::kPercent:
649            appendTo.append(fPercent, UNUM_PERCENT_FIELD);
650            break;
651        case AffixPattern::kPerMill:
652            appendTo.append(fPermill, UNUM_PERMILL_FIELD);
653            break;
654        case AffixPattern::kNegative:
655            appendTo.append(fNegative, UNUM_SIGN_FIELD);
656            break;
657        case AffixPattern::kPositive:
658            appendTo.append(fPositive, UNUM_SIGN_FIELD);
659            break;
660        case AffixPattern::kCurrency:
661            switch (iter.getTokenLength()) {
662                case 1:
663                    appendTo.append(
664                            currencyAffixInfo.getSymbol(), UNUM_CURRENCY_FIELD);
665                    break;
666                case 2:
667                    appendTo.append(
668                            currencyAffixInfo.getISO(), UNUM_CURRENCY_FIELD);
669                    break;
670                case 3:
671                    appendTo.append(
672                            currencyAffixInfo.getLong(), UNUM_CURRENCY_FIELD, status);
673                    break;
674                default:
675                    U_ASSERT(FALSE);
676                    break;
677            }
678            break;
679        case AffixPattern::kLiteral:
680            appendTo.append(iter.getLiteral(literal));
681            break;
682        default:
683            U_ASSERT(FALSE);
684            break;
685        }
686    }
687    return appendTo;
688}
689
690
691U_NAMESPACE_END
692#endif /* #if !UCONFIG_NO_FORMATTING */
693