1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2015, International Business Machines
6* Corporation and others.  All Rights Reserved.
7*******************************************************************************
8* affixpatternparser.h
9*
10* created on: 2015jan06
11* created by: Travis Keep
12*/
13
14#ifndef __AFFIX_PATTERN_PARSER_H__
15#define __AFFIX_PATTERN_PARSER_H__
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_FORMATTING
20
21#include "unicode/unistr.h"
22#include "unicode/uobject.h"
23#include "pluralaffix.h"
24
25U_NAMESPACE_BEGIN
26
27class PluralRules;
28class FixedPrecision;
29class DecimalFormatSymbols;
30
31/**
32 * A representation of the various forms of a particular currency according
33 * to some locale and usage context.
34 *
35 * Includes the symbol, ISO code form, and long form(s) of the currency name
36 * for each plural variation.
37 */
38class U_I18N_API CurrencyAffixInfo : public UMemory {
39public:
40    /**
41     * Symbol is \u00a4; ISO form is \u00a4\u00a4;
42     *  long form is \u00a4\u00a4\u00a4.
43     */
44    CurrencyAffixInfo();
45
46    const UnicodeString &getSymbol() const { return fSymbol; }
47    const UnicodeString &getISO() const { return fISO; }
48    const PluralAffix &getLong() const { return fLong; }
49    void setSymbol(const UnicodeString &symbol) {
50        fSymbol = symbol;
51        fIsDefault = FALSE;
52    }
53    void setISO(const UnicodeString &iso) {
54        fISO = iso;
55        fIsDefault = FALSE;
56    }
57    UBool
58    equals(const CurrencyAffixInfo &other) const {
59        return (fSymbol == other.fSymbol)
60                && (fISO == other.fISO)
61                && (fLong.equals(other.fLong))
62                && (fIsDefault == other.fIsDefault);
63    }
64
65    /**
66     * Intializes this instance.
67     *
68     * @param locale the locale for the currency forms.
69     * @param rules The plural rules for the locale.
70     * @param currency the null terminated, 3 character ISO code of the
71     * currency. If NULL, resets this instance as if it were just created.
72     * In this case, the first 2 parameters may be NULL as well.
73     * @param status any error returned here.
74     */
75    void set(
76            const char *locale, const PluralRules *rules,
77            const UChar *currency, UErrorCode &status);
78
79    /**
80     * Returns true if this instance is the default. That is has no real
81     * currency. For instance never initialized with set()
82     * or reset with set(NULL, NULL, NULL, status).
83     */
84    UBool isDefault() const { return fIsDefault; }
85
86    /**
87     * Adjusts the precision used for a particular currency.
88     * @param currency the null terminated, 3 character ISO code of the
89     * currency.
90     * @param usage the usage of the currency
91     * @param precision min/max fraction digits and rounding increment
92     *  adjusted.
93     * @params status any error reported here.
94     */
95    static void adjustPrecision(
96            const UChar *currency, const UCurrencyUsage usage,
97            FixedPrecision &precision, UErrorCode &status);
98
99private:
100    /**
101     * The symbol form of the currency.
102     */
103    UnicodeString fSymbol;
104
105    /**
106     * The ISO form of the currency, usually three letter abbreviation.
107     */
108    UnicodeString fISO;
109
110    /**
111     * The long forms of the currency keyed by plural variation.
112     */
113    PluralAffix fLong;
114
115    UBool fIsDefault;
116
117};
118
119class AffixPatternIterator;
120
121/**
122 * A locale agnostic representation of an affix pattern.
123 */
124class U_I18N_API AffixPattern : public UMemory {
125public:
126
127    /**
128     * The token types that can appear in an affix pattern.
129     */
130    enum ETokenType {
131        kLiteral,
132        kPercent,
133        kPerMill,
134        kCurrency,
135        kNegative,
136        kPositive
137    };
138
139    /**
140     * An empty affix pattern.
141     */
142    AffixPattern()
143            : tokens(), literals(), hasCurrencyToken(FALSE),
144              hasPercentToken(FALSE), hasPermillToken(FALSE),  char32Count(0) {
145    }
146
147    /**
148     * Adds a string literal to this affix pattern.
149     */
150    void addLiteral(const UChar *, int32_t start, int32_t len);
151
152    /**
153     * Adds a token to this affix pattern. t must not be kLiteral as
154     * the addLiteral() method adds literals.
155     * @param t the token type to add
156     */
157    void add(ETokenType t);
158
159    /**
160     * Adds a currency token with specific count to this affix pattern.
161     * @param count the token count. Used to distinguish between
162     *  one, two, or three currency symbols. Note that adding a currency
163     *  token with count=2 (Use ISO code) is different than adding two
164     *  currency tokens each with count=1 (two currency symbols).
165     */
166    void addCurrency(uint8_t count);
167
168    /**
169     * Makes this instance be an empty affix pattern.
170     */
171    void remove();
172
173    /**
174     * Provides an iterator over the tokens in this instance.
175     * @param result this is initialized to point just before the
176     *   first token of this instance. Caller must call nextToken()
177     *   on the iterator once it is set up to have it actually point
178     *   to the first token. This first call to nextToken() will return
179     *   FALSE if the AffixPattern being iterated over is empty.
180     * @return result
181     */
182    AffixPatternIterator &iterator(AffixPatternIterator &result) const;
183
184    /**
185     * Returns TRUE if this instance has currency tokens in it.
186     */
187    UBool usesCurrency() const {
188        return hasCurrencyToken;
189    }
190
191    UBool usesPercent() const {
192        return hasPercentToken;
193    }
194
195    UBool usesPermill() const {
196        return hasPermillToken;
197    }
198
199    /**
200     * Returns the number of code points a string of this instance
201     * would have if none of the special tokens were escaped.
202     * Used to compute the padding size.
203     */
204    int32_t countChar32() const {
205        return char32Count;
206    }
207
208    /**
209     * Appends other to this instance mutating this instance in place.
210     * @param other The pattern appended to the end of this one.
211     * @return a reference to this instance for chaining.
212     */
213    AffixPattern &append(const AffixPattern &other);
214
215    /**
216     * Converts this AffixPattern back into a user string.
217     * It is the inverse of parseUserAffixString.
218     */
219    UnicodeString &toUserString(UnicodeString &appendTo) const;
220
221    /**
222     * Converts this AffixPattern back into a string.
223     * It is the inverse of parseAffixString.
224     */
225    UnicodeString &toString(UnicodeString &appendTo) const;
226
227    /**
228     * Parses an affix pattern string appending it to an AffixPattern.
229     * Parses affix pattern strings produced from using
230     * DecimalFormatPatternParser to parse a format pattern. Affix patterns
231     * include the positive prefix and suffix and the negative prefix
232     * and suffix. This method expects affix patterns strings to be in the
233     * same format that DecimalFormatPatternParser produces. Namely special
234     * characters in the affix that correspond to a field type must be
235     * prefixed with an apostrophe ('). These special character sequences
236     * inluce minus (-), percent (%), permile (U+2030), plus (+),
237     * short currency (U+00a4), medium currency (u+00a4 * 2),
238     * long currency (u+a4 * 3), and apostrophe (')
239     * (apostrophe does not correspond to a field type but has to be escaped
240     * because it itself is the escape character).
241     * Since the expansion of these special character
242     * sequences is locale dependent, these sequences are not expanded in
243     * an AffixPattern instance.
244     * If these special characters are not prefixed with an apostrophe in
245     * the affix pattern string, then they are treated verbatim just as
246     * any other character. If an apostrophe prefixes a non special
247     * character in the affix pattern, the apostrophe is simply ignored.
248     *
249     * @param affixStr the string from DecimalFormatPatternParser
250     * @param appendTo parsed result appended here.
251     * @param status any error parsing returned here.
252     */
253    static AffixPattern &parseAffixString(
254            const UnicodeString &affixStr,
255            AffixPattern &appendTo,
256            UErrorCode &status);
257
258    /**
259     * Parses an affix pattern string appending it to an AffixPattern.
260     * Parses affix pattern strings as the user would supply them.
261     * In this function, quoting makes special characters like normal
262     * characters whereas in parseAffixString, quoting makes special
263     * characters special.
264     *
265     * @param affixStr the string from the user
266     * @param appendTo parsed result appended here.
267     * @param status any error parsing returned here.
268     */
269    static AffixPattern &parseUserAffixString(
270            const UnicodeString &affixStr,
271            AffixPattern &appendTo,
272            UErrorCode &status);
273
274    UBool equals(const AffixPattern &other) const {
275        return (tokens == other.tokens)
276                && (literals == other.literals)
277                && (hasCurrencyToken == other.hasCurrencyToken)
278                && (hasPercentToken == other.hasPercentToken)
279                && (hasPermillToken == other.hasPermillToken)
280                && (char32Count == other.char32Count);
281    }
282
283private:
284    /*
285     * Tokens stored here. Each UChar generally stands for one token. A
286     * Each token is of form 'etttttttllllllll' llllllll is the length of
287     * the token and ranges from 0-255. ttttttt is the token type and ranges
288     * from 0-127. If e is set it means this is an extendo token (to be
289     * described later). To accomodate token lengths above 255, each normal
290     * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
291     * the same type. Right now only kLiteral Tokens have extendo tokens.
292     * Each extendo token provides the next 8 higher bits for the length.
293     * If a kLiteral token is followed by 2 extendo tokens then, then the
294     * llllllll of the next extendo token contains bits 8-15 of the length
295     * and the last extendo token contains bits 16-23 of the length.
296     */
297    UnicodeString tokens;
298
299    /*
300     * The characters of the kLiteral tokens are concatenated together here.
301     * The first characters go with the first kLiteral token, the next
302     * characters go with the next kLiteral token etc.
303     */
304    UnicodeString literals;
305    UBool hasCurrencyToken;
306    UBool hasPercentToken;
307    UBool hasPermillToken;
308    int32_t char32Count;
309    void add(ETokenType t, uint8_t count);
310
311};
312
313/**
314 * An iterator over the tokens in an AffixPattern instance.
315 */
316class U_I18N_API AffixPatternIterator : public UMemory {
317public:
318
319    /**
320     * Using an iterator without first calling iterator on an AffixPattern
321     * instance to initialize the iterator results in
322     * undefined behavior.
323     */
324    AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
325    /**
326     * Advances this iterator to the next token. Returns FALSE when there
327     * are no more tokens. Calling the other methods after nextToken()
328     * returns FALSE results in undefined behavior.
329     */
330    UBool nextToken();
331
332    /**
333     * Returns the type of token.
334     */
335    AffixPattern::ETokenType getTokenType() const;
336
337    /**
338     * For literal tokens, returns the literal string. Calling this for
339     * other token types results in undefined behavior.
340     * @param result replaced with a read-only alias to the literal string.
341     * @return result
342     */
343    UnicodeString &getLiteral(UnicodeString &result) const;
344
345    /**
346     * Returns the token length. Usually 1, but for currency tokens may
347     * be 2 for ISO code and 3 for long form.
348     */
349    int32_t getTokenLength() const;
350private:
351    int32_t nextLiteralIndex;
352    int32_t lastLiteralLength;
353    int32_t nextTokenIndex;
354    const UnicodeString *tokens;
355    const UnicodeString *literals;
356    friend class AffixPattern;
357    AffixPatternIterator(const AffixPatternIterator &);
358    AffixPatternIterator &operator=(const AffixPatternIterator &);
359};
360
361/**
362 * A locale aware class that converts locale independent AffixPattern
363 * instances into locale dependent PluralAffix instances.
364 */
365class U_I18N_API AffixPatternParser : public UMemory {
366public:
367AffixPatternParser();
368AffixPatternParser(const DecimalFormatSymbols &symbols);
369void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
370
371/**
372 * Parses affixPattern appending the result to appendTo.
373 * @param affixPattern The affix pattern.
374 * @param currencyAffixInfo contains the currency forms.
375 * @param appendTo The result of parsing affixPattern is appended here.
376 * @param status any error returned here.
377 * @return appendTo.
378 */
379PluralAffix &parse(
380        const AffixPattern &affixPattern,
381        const CurrencyAffixInfo &currencyAffixInfo,
382        PluralAffix &appendTo,
383        UErrorCode &status) const;
384
385UBool equals(const AffixPatternParser &other) const {
386    return (fPercent == other.fPercent)
387            && (fPermill == other.fPermill)
388            && (fNegative == other.fNegative)
389            && (fPositive == other.fPositive);
390}
391
392private:
393UnicodeString fPercent;
394UnicodeString fPermill;
395UnicodeString fNegative;
396UnicodeString fPositive;
397};
398
399
400U_NAMESPACE_END
401#endif /* #if !UCONFIG_NO_FORMATTING */
402#endif  // __AFFIX_PATTERN_PARSER_H__
403