1/*
2*******************************************************************************
3*
4*   Copyright (C) 2001-2010, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  ucol_tok.h
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created 02/22/2001
14*   created by: Vladimir Weinstein
15*
16* This module reads a tailoring rule string and produces a list of
17* tokens that will be turned into collation elements
18*
19*/
20
21#ifndef UCOL_TOKENS_H
22#define UCOL_TOKENS_H
23
24#include "unicode/utypes.h"
25#include "unicode/uset.h"
26
27#if !UCONFIG_NO_COLLATION
28
29#include "ucol_imp.h"
30#include "uhash.h"
31#include "unicode/parseerr.h"
32
33#define UCOL_TOK_UNSET 0xFFFFFFFF
34#define UCOL_TOK_RESET 0xDEADBEEF
35
36#define UCOL_TOK_POLARITY_NEGATIVE 0
37#define UCOL_TOK_POLARITY_POSITIVE 1
38
39#define UCOL_TOK_TOP 0x04
40#define UCOL_TOK_VARIABLE_TOP 0x08
41#define UCOL_TOK_BEFORE 0x03
42#define UCOL_TOK_SUCCESS 0x10
43
44/* this is space for the extra strings that need to be unquoted */
45/* during the parsing of the rules */
46#define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096
47typedef struct UColToken UColToken;
48
49typedef struct  {
50  UColToken* first;
51  UColToken* last;
52  UColToken* reset;
53  UBool indirect;
54  uint32_t baseCE;
55  uint32_t baseContCE;
56  uint32_t nextCE;
57  uint32_t nextContCE;
58  uint32_t previousCE;
59  uint32_t previousContCE;
60  int32_t pos[UCOL_STRENGTH_LIMIT];
61  uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT];
62  uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT];
63  uint32_t numStr[UCOL_CE_STRENGTH_LIMIT];
64  UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT];
65  UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT];
66} UColTokListHeader;
67
68struct UColToken {
69  UChar debugSource;
70  UChar debugExpansion;
71  UChar debugPrefix;
72  uint32_t CEs[128];
73  uint32_t noOfCEs;
74  uint32_t expCEs[128];
75  uint32_t noOfExpCEs;
76  uint32_t source;
77  uint32_t expansion;
78  uint32_t prefix;
79  uint32_t strength;
80  uint32_t toInsert;
81  uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */
82  UColTokListHeader *listHeader;
83  UColToken* previous;
84  UColToken* next;
85  UChar **rulesToParseHdl;
86  uint16_t flags;
87};
88
89/*
90 * This is a token that has been parsed
91 * but not yet processed. Used to reduce
92 * the number of arguments in the parser
93 */
94typedef struct {
95  uint32_t strength;
96  uint32_t charsOffset;
97  uint32_t charsLen;
98  uint32_t extensionOffset;
99  uint32_t extensionLen;
100  uint32_t prefixOffset;
101  uint32_t prefixLen;
102  uint16_t flags;
103  uint16_t indirectIndex;
104} UColParsedToken;
105
106
107typedef struct {
108  UColParsedToken parsedToken;
109  UChar *source;
110  UChar *end;
111  const UChar *current;
112  UChar *sourceCurrent;
113  UChar *extraCurrent;
114  UChar *extraEnd;
115  const InverseUCATableHeader *invUCA;
116  const UCollator *UCA;
117  UHashtable *tailored;
118  UColOptionSet *opts;
119  uint32_t resultLen;
120  uint32_t listCapacity;
121  UColTokListHeader *lh;
122  UColToken *varTop;
123  USet *copySet;
124  USet *removeSet;
125  UBool buildCCTabFlag;  /* Tailoring rule requirs building combining class table. */
126
127  UChar32 previousCp;               /* Previous code point. */
128  /* For processing starred lists. */
129  UBool isStarred;                   /* Are we processing a starred token? */
130  UBool savedIsStarred;
131  uint32_t currentStarredCharIndex;  /* Index of the current charrecter in the starred expression. */
132  uint32_t lastStarredCharIndex;    /* Index to the last character in the starred expression. */
133
134  /* For processing ranges. */
135  UBool inRange;                     /* Are we in a range? */
136  UChar32 currentRangeCp;           /* Current code point in the range. */
137  UChar32 lastRangeCp;              /* The last code point in the range. */
138
139  /* reorder codes for collation reordering */
140  int32_t* reorderCodes;
141  int32_t reorderCodesLength;
142
143} UColTokenParser;
144
145typedef struct {
146  const UChar *subName;
147  int32_t subLen;
148  UColAttributeValue attrVal;
149} ucolTokSuboption;
150
151typedef struct {
152   const UChar *optionName;
153   int32_t optionLen;
154   const ucolTokSuboption *subopts;
155   int32_t subSize;
156   UColAttribute attr;
157} ucolTokOption;
158
159#define ucol_tok_isSpecialChar(ch)              \
160    (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \
161      (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \
162      (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \
163      (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \
164      (ch) == 0x007B))
165
166
167U_CFUNC
168uint32_t ucol_tok_assembleTokenList(UColTokenParser *src,
169                                    UParseError *parseError,
170                                    UErrorCode *status);
171
172U_CFUNC
173void ucol_tok_initTokenList(UColTokenParser *src,
174                            const UChar *rules,
175                            const uint32_t rulesLength,
176                            const UCollator *UCA,
177                            GetCollationRulesFunction importFunc,
178                            void* context,
179                            UErrorCode *status);
180
181U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src);
182
183U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src,
184                        UBool startOfRules,
185                        UParseError *parseError,
186                        UErrorCode *status);
187
188
189U_CAPI const UChar * U_EXPORT2
190ucol_tok_getNextArgument(const UChar *start, const UChar *end,
191                               UColAttribute *attrib, UColAttributeValue *value,
192                               UErrorCode *status);
193U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,
194                                            uint32_t CE, uint32_t contCE,
195                                            uint32_t *nextCE, uint32_t *nextContCE,
196                                            uint32_t strength);
197U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
198                                            uint32_t CE, uint32_t contCE,
199                                            uint32_t *prevCE, uint32_t *prevContCE,
200                                            uint32_t strength);
201
202U_CFUNC const UChar* ucol_tok_getRulesFromBundle(
203    void* context,
204    const char* locale,
205    const char* type,
206    int32_t* pLength,
207    UErrorCode* status);
208
209#endif /* #if !UCONFIG_NO_COLLATION */
210
211#endif
212