1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2007-2016, International Business Machines Corporation and
6* others. All Rights Reserved.
7*******************************************************************************
8*
9* File PLURRULE_IMPL.H
10*
11*******************************************************************************
12*/
13
14
15#ifndef PLURRULE_IMPL
16#define PLURRULE_IMPL
17
18// Internal definitions for the PluralRules implementation.
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_FORMATTING
23
24#include "unicode/format.h"
25#include "unicode/locid.h"
26#include "unicode/parseerr.h"
27#include "unicode/ures.h"
28#include "uvector.h"
29#include "hash.h"
30
31class PluralRulesTest;
32
33U_NAMESPACE_BEGIN
34
35class AndConstraint;
36class RuleChain;
37class DigitInterval;
38class PluralRules;
39class VisibleDigits;
40
41static const UChar DOT             = ((UChar)0x002E);
42static const UChar SINGLE_QUOTE    = ((UChar)0x0027);
43static const UChar SLASH           = ((UChar)0x002F);
44static const UChar BACKSLASH       = ((UChar)0x005C);
45static const UChar SPACE           = ((UChar)0x0020);
46static const UChar EXCLAMATION     = ((UChar)0x0021);
47static const UChar QUOTATION_MARK  = ((UChar)0x0022);
48static const UChar NUMBER_SIGN     = ((UChar)0x0023);
49static const UChar PERCENT_SIGN    = ((UChar)0x0025);
50static const UChar ASTERISK        = ((UChar)0x002A);
51static const UChar COMMA           = ((UChar)0x002C);
52static const UChar HYPHEN          = ((UChar)0x002D);
53static const UChar U_ZERO          = ((UChar)0x0030);
54static const UChar U_ONE           = ((UChar)0x0031);
55static const UChar U_TWO           = ((UChar)0x0032);
56static const UChar U_THREE         = ((UChar)0x0033);
57static const UChar U_FOUR          = ((UChar)0x0034);
58static const UChar U_FIVE          = ((UChar)0x0035);
59static const UChar U_SIX           = ((UChar)0x0036);
60static const UChar U_SEVEN         = ((UChar)0x0037);
61static const UChar U_EIGHT         = ((UChar)0x0038);
62static const UChar U_NINE          = ((UChar)0x0039);
63static const UChar COLON           = ((UChar)0x003A);
64static const UChar SEMI_COLON      = ((UChar)0x003B);
65static const UChar EQUALS          = ((UChar)0x003D);
66static const UChar AT              = ((UChar)0x0040);
67static const UChar CAP_A           = ((UChar)0x0041);
68static const UChar CAP_B           = ((UChar)0x0042);
69static const UChar CAP_R           = ((UChar)0x0052);
70static const UChar CAP_Z           = ((UChar)0x005A);
71static const UChar LOWLINE         = ((UChar)0x005F);
72static const UChar LEFTBRACE       = ((UChar)0x007B);
73static const UChar RIGHTBRACE      = ((UChar)0x007D);
74static const UChar TILDE           = ((UChar)0x007E);
75static const UChar ELLIPSIS        = ((UChar)0x2026);
76
77static const UChar LOW_A           = ((UChar)0x0061);
78static const UChar LOW_B           = ((UChar)0x0062);
79static const UChar LOW_C           = ((UChar)0x0063);
80static const UChar LOW_D           = ((UChar)0x0064);
81static const UChar LOW_E           = ((UChar)0x0065);
82static const UChar LOW_F           = ((UChar)0x0066);
83static const UChar LOW_G           = ((UChar)0x0067);
84static const UChar LOW_H           = ((UChar)0x0068);
85static const UChar LOW_I           = ((UChar)0x0069);
86static const UChar LOW_J           = ((UChar)0x006a);
87static const UChar LOW_K           = ((UChar)0x006B);
88static const UChar LOW_L           = ((UChar)0x006C);
89static const UChar LOW_M           = ((UChar)0x006D);
90static const UChar LOW_N           = ((UChar)0x006E);
91static const UChar LOW_O           = ((UChar)0x006F);
92static const UChar LOW_P           = ((UChar)0x0070);
93static const UChar LOW_Q           = ((UChar)0x0071);
94static const UChar LOW_R           = ((UChar)0x0072);
95static const UChar LOW_S           = ((UChar)0x0073);
96static const UChar LOW_T           = ((UChar)0x0074);
97static const UChar LOW_U           = ((UChar)0x0075);
98static const UChar LOW_V           = ((UChar)0x0076);
99static const UChar LOW_W           = ((UChar)0x0077);
100static const UChar LOW_Y           = ((UChar)0x0079);
101static const UChar LOW_Z           = ((UChar)0x007A);
102
103
104static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff;
105
106enum tokenType {
107  none,
108  tNumber,
109  tComma,
110  tSemiColon,
111  tSpace,
112  tColon,
113  tAt,           // '@'
114  tDot,
115  tDot2,
116  tEllipsis,
117  tKeyword,
118  tAnd,
119  tOr,
120  tMod,          // 'mod' or '%'
121  tNot,          //  'not' only.
122  tIn,           //  'in'  only.
123  tEqual,        //  '='   only.
124  tNotEqual,     //  '!='
125  tTilde,
126  tWithin,
127  tIs,
128  tVariableN,
129  tVariableI,
130  tVariableF,
131  tVariableV,
132  tVariableT,
133  tDecimal,
134  tInteger,
135  tEOF
136};
137
138
139class PluralRuleParser: public UMemory {
140public:
141    PluralRuleParser();
142    virtual ~PluralRuleParser();
143
144    void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status);
145    void getNextToken(UErrorCode &status);
146    void checkSyntax(UErrorCode &status);
147    static int32_t getNumberValue(const UnicodeString &token);
148
149private:
150    static tokenType getKeyType(const UnicodeString& token, tokenType type);
151    static tokenType charType(UChar ch);
152    static UBool isValidKeyword(const UnicodeString& token);
153
154    const UnicodeString  *ruleSrc;  // The rules string.
155    int32_t        ruleIndex;       // String index in the input rules, the current parse position.
156    UnicodeString  token;           // Token most recently scanned.
157    tokenType      type;
158    tokenType      prevType;
159
160                                    // The items currently being parsed & built.
161                                    // Note: currentChain may not be the last RuleChain in the
162                                    //       list because the "other" chain is forced to the end.
163    AndConstraint *curAndConstraint;
164    RuleChain     *currentChain;
165
166    int32_t        rangeLowIdx;     // Indices in the UVector of ranges of the
167    int32_t        rangeHiIdx;      //    low and hi values currently being parsed.
168
169    enum EParseState {
170       kKeyword,
171       kExpr,
172       kValue,
173       kRangeList,
174       kSamples
175    };
176
177};
178
179/**
180 * class FixedDecimal serves to communicate the properties
181 * of a formatted number from a decimal formatter to PluralRules::select()
182 *
183 * see DecimalFormat::getFixedDecimal()
184 * @internal
185 */
186class U_I18N_API FixedDecimal: public UMemory {
187  public:
188    /**
189      * @param n   the number, e.g. 12.345
190      * @param v   The number of visible fraction digits, e.g. 3
191      * @param f   The fraction digits, e.g. 345
192      */
193    FixedDecimal(double  n, int32_t v, int64_t f);
194    FixedDecimal(double n, int32_t);
195    explicit FixedDecimal(double n);
196    explicit FixedDecimal(const VisibleDigits &n);
197    FixedDecimal();
198    FixedDecimal(const UnicodeString &s, UErrorCode &ec);
199    FixedDecimal(const FixedDecimal &other);
200
201    double get(tokenType operand) const;
202    int32_t getVisibleFractionDigitCount() const;
203
204    void init(double n, int32_t v, int64_t f);
205    void init(double n);
206    UBool quickInit(double n);  // Try a fast-path only initialization,
207                                //    return TRUE if successful.
208    void adjustForMinFractionDigits(int32_t min);
209    static int64_t getFractionalDigits(double n, int32_t v);
210    static int32_t decimals(double n);
211
212    double      source;
213    int32_t     visibleDecimalDigitCount;
214    int64_t     decimalDigits;
215    int64_t     decimalDigitsWithoutTrailingZeros;
216    int64_t     intValue;
217    UBool       hasIntegerValue;
218    UBool       isNegative;
219    UBool       isNanOrInfinity;
220};
221
222class AndConstraint : public UMemory  {
223public:
224    typedef enum RuleOp {
225        NONE,
226        MOD
227    } RuleOp;
228    RuleOp  op;
229    int32_t opNum;           // for mod expressions, the right operand of the mod.
230    int32_t     value;       // valid for 'is' rules only.
231    UVector32   *rangeList;  // for 'in', 'within' rules. Null otherwise.
232    UBool   negated;           // TRUE for negated rules.
233    UBool   integerOnly;     // TRUE for 'within' rules.
234    tokenType digitsType;    // n | i | v | f constraint.
235    AndConstraint *next;
236
237    AndConstraint();
238    AndConstraint(const AndConstraint& other);
239    virtual ~AndConstraint();
240    AndConstraint* add();
241    // UBool isFulfilled(double number);
242    UBool isFulfilled(const FixedDecimal &number);
243};
244
245class OrConstraint : public UMemory  {
246public:
247    AndConstraint *childNode;
248    OrConstraint *next;
249    OrConstraint();
250
251    OrConstraint(const OrConstraint& other);
252    virtual ~OrConstraint();
253    AndConstraint* add();
254    // UBool isFulfilled(double number);
255    UBool isFulfilled(const FixedDecimal &number);
256};
257
258class RuleChain : public UMemory  {
259public:
260    UnicodeString   fKeyword;
261    RuleChain      *fNext;
262    OrConstraint   *ruleHeader;
263    UnicodeString   fDecimalSamples;  // Samples strings from rule source
264    UnicodeString   fIntegerSamples;  //   without @decimal or @integer, otherwise unprocessed.
265    UBool           fDecimalSamplesUnbounded;
266    UBool           fIntegerSamplesUnbounded;
267
268
269    RuleChain();
270    RuleChain(const RuleChain& other);
271    virtual ~RuleChain();
272
273    UnicodeString select(const FixedDecimal &number) const;
274    void          dumpRules(UnicodeString& result);
275    UErrorCode    getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const;
276    UBool         isKeyword(const UnicodeString& keyword) const;
277};
278
279class PluralKeywordEnumeration : public StringEnumeration {
280public:
281    PluralKeywordEnumeration(RuleChain *header, UErrorCode& status);
282    virtual ~PluralKeywordEnumeration();
283    static UClassID U_EXPORT2 getStaticClassID(void);
284    virtual UClassID getDynamicClassID(void) const;
285    virtual const UnicodeString* snext(UErrorCode& status);
286    virtual void reset(UErrorCode& status);
287    virtual int32_t count(UErrorCode& status) const;
288private:
289    int32_t         pos;
290    UVector         fKeywordNames;
291};
292
293
294class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration {
295  public:
296    PluralAvailableLocalesEnumeration(UErrorCode &status);
297    virtual ~PluralAvailableLocalesEnumeration();
298    virtual const char* next(int32_t *resultLength, UErrorCode& status);
299    virtual void reset(UErrorCode& status);
300    virtual int32_t count(UErrorCode& status) const;
301  private:
302    UErrorCode      fOpenStatus;
303    UResourceBundle *fLocales;
304    UResourceBundle *fRes;
305};
306
307U_NAMESPACE_END
308
309#endif /* #if !UCONFIG_NO_FORMATTING */
310
311#endif // _PLURRULE_IMPL
312//eof
313