1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2007-2016, International Business Machines Corporation and
6* others. All Rights Reserved.
7*******************************************************************************
8*
9* File PLURRULE_IMPL.H
10*
11*******************************************************************************
12*/
13
14
15#ifndef PLURRULE_IMPL
16#define PLURRULE_IMPL
17
18// Internal definitions for the PluralRules implementation.
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_FORMATTING
23
24#include "unicode/format.h"
25#include "unicode/locid.h"
26#include "unicode/parseerr.h"
27#include "unicode/strenum.h"
28#include "unicode/ures.h"
29#include "uvector.h"
30#include "hash.h"
31#include "uassert.h"
32
33class PluralRulesTest;
34
35U_NAMESPACE_BEGIN
36
37class AndConstraint;
38class RuleChain;
39class DigitInterval;
40class PluralRules;
41class VisibleDigits;
42
43static const UChar DOT             = ((UChar)0x002E);
44static const UChar SINGLE_QUOTE    = ((UChar)0x0027);
45static const UChar SLASH           = ((UChar)0x002F);
46static const UChar BACKSLASH       = ((UChar)0x005C);
47static const UChar SPACE           = ((UChar)0x0020);
48static const UChar EXCLAMATION     = ((UChar)0x0021);
49static const UChar QUOTATION_MARK  = ((UChar)0x0022);
50static const UChar NUMBER_SIGN     = ((UChar)0x0023);
51static const UChar PERCENT_SIGN    = ((UChar)0x0025);
52static const UChar ASTERISK        = ((UChar)0x002A);
53static const UChar COMMA           = ((UChar)0x002C);
54static const UChar HYPHEN          = ((UChar)0x002D);
55static const UChar U_ZERO          = ((UChar)0x0030);
56static const UChar U_ONE           = ((UChar)0x0031);
57static const UChar U_TWO           = ((UChar)0x0032);
58static const UChar U_THREE         = ((UChar)0x0033);
59static const UChar U_FOUR          = ((UChar)0x0034);
60static const UChar U_FIVE          = ((UChar)0x0035);
61static const UChar U_SIX           = ((UChar)0x0036);
62static const UChar U_SEVEN         = ((UChar)0x0037);
63static const UChar U_EIGHT         = ((UChar)0x0038);
64static const UChar U_NINE          = ((UChar)0x0039);
65static const UChar COLON           = ((UChar)0x003A);
66static const UChar SEMI_COLON      = ((UChar)0x003B);
67static const UChar EQUALS          = ((UChar)0x003D);
68static const UChar AT              = ((UChar)0x0040);
69static const UChar CAP_A           = ((UChar)0x0041);
70static const UChar CAP_B           = ((UChar)0x0042);
71static const UChar CAP_R           = ((UChar)0x0052);
72static const UChar CAP_Z           = ((UChar)0x005A);
73static const UChar LOWLINE         = ((UChar)0x005F);
74static const UChar LEFTBRACE       = ((UChar)0x007B);
75static const UChar RIGHTBRACE      = ((UChar)0x007D);
76static const UChar TILDE           = ((UChar)0x007E);
77static const UChar ELLIPSIS        = ((UChar)0x2026);
78
79static const UChar LOW_A           = ((UChar)0x0061);
80static const UChar LOW_B           = ((UChar)0x0062);
81static const UChar LOW_C           = ((UChar)0x0063);
82static const UChar LOW_D           = ((UChar)0x0064);
83static const UChar LOW_E           = ((UChar)0x0065);
84static const UChar LOW_F           = ((UChar)0x0066);
85static const UChar LOW_G           = ((UChar)0x0067);
86static const UChar LOW_H           = ((UChar)0x0068);
87static const UChar LOW_I           = ((UChar)0x0069);
88static const UChar LOW_J           = ((UChar)0x006a);
89static const UChar LOW_K           = ((UChar)0x006B);
90static const UChar LOW_L           = ((UChar)0x006C);
91static const UChar LOW_M           = ((UChar)0x006D);
92static const UChar LOW_N           = ((UChar)0x006E);
93static const UChar LOW_O           = ((UChar)0x006F);
94static const UChar LOW_P           = ((UChar)0x0070);
95static const UChar LOW_Q           = ((UChar)0x0071);
96static const UChar LOW_R           = ((UChar)0x0072);
97static const UChar LOW_S           = ((UChar)0x0073);
98static const UChar LOW_T           = ((UChar)0x0074);
99static const UChar LOW_U           = ((UChar)0x0075);
100static const UChar LOW_V           = ((UChar)0x0076);
101static const UChar LOW_W           = ((UChar)0x0077);
102static const UChar LOW_Y           = ((UChar)0x0079);
103static const UChar LOW_Z           = ((UChar)0x007A);
104
105
106static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff;
107
108enum tokenType {
109  none,
110  tNumber,
111  tComma,
112  tSemiColon,
113  tSpace,
114  tColon,
115  tAt,           // '@'
116  tDot,
117  tDot2,
118  tEllipsis,
119  tKeyword,
120  tAnd,
121  tOr,
122  tMod,          // 'mod' or '%'
123  tNot,          //  'not' only.
124  tIn,           //  'in'  only.
125  tEqual,        //  '='   only.
126  tNotEqual,     //  '!='
127  tTilde,
128  tWithin,
129  tIs,
130  tVariableN,
131  tVariableI,
132  tVariableF,
133  tVariableV,
134  tVariableT,
135  tDecimal,
136  tInteger,
137  tEOF
138};
139
140
141class PluralRuleParser: public UMemory {
142public:
143    PluralRuleParser();
144    virtual ~PluralRuleParser();
145
146    void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status);
147    void getNextToken(UErrorCode &status);
148    void checkSyntax(UErrorCode &status);
149    static int32_t getNumberValue(const UnicodeString &token);
150
151private:
152    static tokenType getKeyType(const UnicodeString& token, tokenType type);
153    static tokenType charType(UChar ch);
154    static UBool isValidKeyword(const UnicodeString& token);
155
156    const UnicodeString  *ruleSrc;  // The rules string.
157    int32_t        ruleIndex;       // String index in the input rules, the current parse position.
158    UnicodeString  token;           // Token most recently scanned.
159    tokenType      type;
160    tokenType      prevType;
161
162                                    // The items currently being parsed & built.
163                                    // Note: currentChain may not be the last RuleChain in the
164                                    //       list because the "other" chain is forced to the end.
165    AndConstraint *curAndConstraint;
166    RuleChain     *currentChain;
167
168    int32_t        rangeLowIdx;     // Indices in the UVector of ranges of the
169    int32_t        rangeHiIdx;      //    low and hi values currently being parsed.
170
171    enum EParseState {
172       kKeyword,
173       kExpr,
174       kValue,
175       kRangeList,
176       kSamples
177    };
178
179};
180
181enum PluralOperand {
182    /**
183    * The double value of the entire number.
184    */
185    PLURAL_OPERAND_N,
186
187    /**
188     * The integer value, with the fraction digits truncated off.
189     */
190    PLURAL_OPERAND_I,
191
192    /**
193     * All visible fraction digits as an integer, including trailing zeros.
194     */
195    PLURAL_OPERAND_F,
196
197    /**
198     * Visible fraction digits as an integer, not including trailing zeros.
199     */
200    PLURAL_OPERAND_T,
201
202    /**
203     * Number of visible fraction digits.
204     */
205    PLURAL_OPERAND_V,
206
207    /**
208     * Number of visible fraction digits, not including trailing zeros.
209     */
210    PLURAL_OPERAND_W,
211
212    /**
213     * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC.
214     *
215     * <p>Returns the integer value, but will fail if the number has fraction digits.
216     * That is, using "j" instead of "i" is like implicitly adding "v is 0".
217     *
218     * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches
219     * "3" but not "3.1" or "3.0".
220     */
221    PLURAL_OPERAND_J
222};
223
224/**
225 * Converts from the tokenType enum to PluralOperand. Asserts that the given
226 * tokenType can be mapped to a PluralOperand.
227 */
228PluralOperand tokenTypeToPluralOperand(tokenType tt);
229
230/**
231 * An interface to FixedDecimal, allowing for other implementations.
232 * @internal
233 */
234class U_I18N_API IFixedDecimal {
235  public:
236    virtual ~IFixedDecimal();
237
238    /**
239     * Returns the value corresponding to the specified operand (n, i, f, t, v, or w).
240     * If the operand is 'n', returns a double; otherwise, returns an integer.
241     */
242    virtual double getPluralOperand(PluralOperand operand) const = 0;
243
244    virtual bool isNaN() const = 0;
245
246    virtual bool isInfinite() const = 0;
247};
248
249/**
250 * class FixedDecimal serves to communicate the properties
251 * of a formatted number from a decimal formatter to PluralRules::select()
252 *
253 * see DecimalFormat::getFixedDecimal()
254 * @internal
255 */
256class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject {
257  public:
258    /**
259      * @param n   the number, e.g. 12.345
260      * @param v   The number of visible fraction digits, e.g. 3
261      * @param f   The fraction digits, e.g. 345
262      */
263    FixedDecimal(double  n, int32_t v, int64_t f);
264    FixedDecimal(double n, int32_t);
265    explicit FixedDecimal(double n);
266    explicit FixedDecimal(const VisibleDigits &n);
267    FixedDecimal();
268    ~FixedDecimal() U_OVERRIDE;
269    FixedDecimal(const UnicodeString &s, UErrorCode &ec);
270    FixedDecimal(const FixedDecimal &other);
271
272    double getPluralOperand(PluralOperand operand) const U_OVERRIDE;
273    bool isNaN() const U_OVERRIDE;
274    bool isInfinite() const U_OVERRIDE;
275
276    bool isNanOrInfinity() const;  // used in decimfmtimpl.cpp
277
278    int32_t getVisibleFractionDigitCount() const;
279
280    void init(double n, int32_t v, int64_t f);
281    void init(double n);
282    UBool quickInit(double n);  // Try a fast-path only initialization,
283                                //    return TRUE if successful.
284    void adjustForMinFractionDigits(int32_t min);
285    static int64_t getFractionalDigits(double n, int32_t v);
286    static int32_t decimals(double n);
287
288    double      source;
289    int32_t     visibleDecimalDigitCount;
290    int64_t     decimalDigits;
291    int64_t     decimalDigitsWithoutTrailingZeros;
292    int64_t     intValue;
293    UBool       hasIntegerValue;
294    UBool       isNegative;
295    UBool       _isNaN;
296    UBool       _isInfinite;
297};
298
299class AndConstraint : public UMemory  {
300public:
301    typedef enum RuleOp {
302        NONE,
303        MOD
304    } RuleOp;
305    RuleOp  op;
306    int32_t opNum;           // for mod expressions, the right operand of the mod.
307    int32_t     value;       // valid for 'is' rules only.
308    UVector32   *rangeList;  // for 'in', 'within' rules. Null otherwise.
309    UBool   negated;           // TRUE for negated rules.
310    UBool   integerOnly;     // TRUE for 'within' rules.
311    tokenType digitsType;    // n | i | v | f constraint.
312    AndConstraint *next;
313
314    AndConstraint();
315    AndConstraint(const AndConstraint& other);
316    virtual ~AndConstraint();
317    AndConstraint* add();
318    // UBool isFulfilled(double number);
319    UBool isFulfilled(const IFixedDecimal &number);
320};
321
322class OrConstraint : public UMemory  {
323public:
324    AndConstraint *childNode;
325    OrConstraint *next;
326    OrConstraint();
327
328    OrConstraint(const OrConstraint& other);
329    virtual ~OrConstraint();
330    AndConstraint* add();
331    // UBool isFulfilled(double number);
332    UBool isFulfilled(const IFixedDecimal &number);
333};
334
335class RuleChain : public UMemory  {
336public:
337    UnicodeString   fKeyword;
338    RuleChain      *fNext;
339    OrConstraint   *ruleHeader;
340    UnicodeString   fDecimalSamples;  // Samples strings from rule source
341    UnicodeString   fIntegerSamples;  //   without @decimal or @integer, otherwise unprocessed.
342    UBool           fDecimalSamplesUnbounded;
343    UBool           fIntegerSamplesUnbounded;
344
345
346    RuleChain();
347    RuleChain(const RuleChain& other);
348    virtual ~RuleChain();
349
350    UnicodeString select(const IFixedDecimal &number) const;
351    void          dumpRules(UnicodeString& result);
352    UErrorCode    getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const;
353    UBool         isKeyword(const UnicodeString& keyword) const;
354};
355
356class PluralKeywordEnumeration : public StringEnumeration {
357public:
358    PluralKeywordEnumeration(RuleChain *header, UErrorCode& status);
359    virtual ~PluralKeywordEnumeration();
360    static UClassID U_EXPORT2 getStaticClassID(void);
361    virtual UClassID getDynamicClassID(void) const;
362    virtual const UnicodeString* snext(UErrorCode& status);
363    virtual void reset(UErrorCode& status);
364    virtual int32_t count(UErrorCode& status) const;
365private:
366    int32_t         pos;
367    UVector         fKeywordNames;
368};
369
370
371class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration {
372  public:
373    PluralAvailableLocalesEnumeration(UErrorCode &status);
374    virtual ~PluralAvailableLocalesEnumeration();
375    virtual const char* next(int32_t *resultLength, UErrorCode& status);
376    virtual void reset(UErrorCode& status);
377    virtual int32_t count(UErrorCode& status) const;
378  private:
379    UErrorCode      fOpenStatus;
380    UResourceBundle *fLocales;
381    UResourceBundle *fRes;
382};
383
384U_NAMESPACE_END
385
386#endif /* #if !UCONFIG_NO_FORMATTING */
387
388#endif // _PLURRULE_IMPL
389//eof
390