1/*
2******************************************************************************
3*
4*   Copyright (C) 1999-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  ubidiimp.h
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 1999aug06
14*   created by: Markus W. Scherer, updated by Matitiahu Allouche
15*/
16
17#ifndef UBIDIIMP_H
18#define UBIDIIMP_H
19
20/* set import/export definitions */
21#ifdef U_COMMON_IMPLEMENTATION
22
23#include "unicode/utypes.h"
24#include "unicode/uchar.h"
25#include "ubidi_props.h"
26
27/* miscellaneous definitions ---------------------------------------------- */
28
29typedef uint8_t DirProp;
30typedef uint32_t Flags;
31
32/*  Comparing the description of the BiDi algorithm with this implementation
33    is easier with the same names for the BiDi types in the code as there.
34    See UCharDirection in uchar.h .
35*/
36enum {
37    L=  U_LEFT_TO_RIGHT,
38    R=  U_RIGHT_TO_LEFT,
39    EN= U_EUROPEAN_NUMBER,
40    ES= U_EUROPEAN_NUMBER_SEPARATOR,
41    ET= U_EUROPEAN_NUMBER_TERMINATOR,
42    AN= U_ARABIC_NUMBER,
43    CS= U_COMMON_NUMBER_SEPARATOR,
44    B=  U_BLOCK_SEPARATOR,
45    S=  U_SEGMENT_SEPARATOR,
46    WS= U_WHITE_SPACE_NEUTRAL,
47    ON= U_OTHER_NEUTRAL,
48    LRE=U_LEFT_TO_RIGHT_EMBEDDING,
49    LRO=U_LEFT_TO_RIGHT_OVERRIDE,
50    AL= U_RIGHT_TO_LEFT_ARABIC,
51    RLE=U_RIGHT_TO_LEFT_EMBEDDING,
52    RLO=U_RIGHT_TO_LEFT_OVERRIDE,
53    PDF=U_POP_DIRECTIONAL_FORMAT,
54    NSM=U_DIR_NON_SPACING_MARK,
55    BN= U_BOUNDARY_NEUTRAL,
56    dirPropCount
57};
58
59/*
60 * Sometimes, bit values are more appropriate
61 * to deal with directionality properties.
62 * Abbreviations in these macro names refer to names
63 * used in the BiDi algorithm.
64 */
65#define DIRPROP_FLAG(dir) (1UL<<(dir))
66
67/* special flag for multiple runs from explicit embedding codes */
68#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
69
70/* are there any characters that are LTR or RTL? */
71#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
72#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
73#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
74
75/* explicit embedding codes */
76#define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
77#define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
78#define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO))
79
80#define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF))
81#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
82
83/* paragraph and segment separators */
84#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
85
86/* all types that are counted as White Space or Neutral in some steps */
87#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT)
88#define MASK_N (DIRPROP_FLAG(ON)|MASK_WS)
89
90/* all types that are included in a sequence of European Terminators for (W5) */
91#define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT)
92
93/* types that are neutrals or could becomes neutrals in (Wn) */
94#define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N)
95
96/*
97 * These types may be changed to "e",
98 * the embedding type (L or R) of the run,
99 * in the BiDi algorithm (N2)
100 */
101#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
102
103/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
104#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
105
106#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
107
108/*
109 * The following bit is ORed to the property of characters in paragraphs
110 * with contextual RTL direction when paraLevel is contextual.
111 */
112#define CONTEXT_RTL 0x80
113#define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL)
114/*
115 * The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit.
116 */
117#define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir)))
118
119#define GET_PARALEVEL(ubidi, index) \
120            (UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \
121                                                   : (ubidi)->paraLevel)
122
123/* Paragraph type for multiple paragraph support ---------------------------- */
124typedef int32_t Para;
125
126#define CR  0x000D
127#define LF  0x000A
128
129/* Run structure for reordering --------------------------------------------- */
130enum {
131    LRM_BEFORE=1,
132    LRM_AFTER=2,
133    RLM_BEFORE=4,
134    RLM_AFTER=8
135};
136
137typedef struct Run {
138    int32_t logicalStart,   /* first character of the run; b31 indicates even/odd level */
139            visualLimit,    /* last visual position of the run +1 */
140            insertRemove;   /* if >0, flags for inserting LRM/RLM before/after run,
141                               if <0, count of bidi controls within run            */
142} Run;
143
144/* in a Run, logicalStart will get this bit set if the run level is odd */
145#define INDEX_ODD_BIT (1UL<<31)
146
147#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
148#define ADD_ODD_BIT_FROM_LEVEL(x, level)  ((x)|=((int32_t)(level)<<31))
149#define REMOVE_ODD_BIT(x)                 ((x)&=~INDEX_ODD_BIT)
150
151#define GET_INDEX(x)   ((x)&~INDEX_ODD_BIT)
152#define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
153#define IS_ODD_RUN(x)  ((UBool)(((x)&INDEX_ODD_BIT)!=0))
154#define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0))
155
156U_CFUNC UBool
157ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
158
159/** BiDi control code points */
160enum {
161    ZWNJ_CHAR=0x200c,
162    ZWJ_CHAR,
163    LRM_CHAR,
164    RLM_CHAR,
165    LRE_CHAR=0x202a,
166    RLE_CHAR,
167    PDF_CHAR,
168    LRO_CHAR,
169    RLO_CHAR
170};
171
172#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5)
173
174/* InsertPoints structure for noting where to put BiDi marks ---------------- */
175
176typedef struct Point {
177    int32_t pos;            /* position in text */
178    int32_t flag;           /* flag for LRM/RLM, before/after */
179} Point;
180
181typedef struct InsertPoints {
182    int32_t capacity;       /* number of points allocated */
183    int32_t size;           /* number of points used */
184    int32_t confirmed;      /* number of points confirmed */
185    UErrorCode errorCode;   /* for eventual memory shortage */
186    Point *points;          /* pointer to array of points */
187} InsertPoints;
188
189
190/* UBiDi structure ----------------------------------------------------------- */
191
192struct UBiDi {
193    /* pointer to parent paragraph object (pointer to self if this object is
194     * a paragraph object); set to NULL in a newly opened object; set to a
195     * real value after a successful execution of ubidi_setPara or ubidi_setLine
196     */
197    const UBiDi * pParaBiDi;
198
199    const UBiDiProps *bdp;
200
201    /* alias pointer to the current text */
202    const UChar *text;
203
204    /* length of the current text */
205    int32_t originalLength;
206
207    /* if the UBIDI_OPTION_STREAMING option is set, this is the length
208     * of text actually processed by ubidi_setPara, which may be shorter than
209     * the original length.
210     * Otherwise, it is identical to the original length.
211     */
212    int32_t length;
213
214    /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
215     * marks are allowed to be inserted in one of the reordering mode, the
216     * length of the result string may be different from the processed length.
217     */
218    int32_t resultLength;
219
220    /* memory sizes in bytes */
221    int32_t dirPropsSize, levelsSize, parasSize, runsSize;
222
223    /* allocated memory */
224    DirProp *dirPropsMemory;
225    UBiDiLevel *levelsMemory;
226    Para *parasMemory;
227    Run *runsMemory;
228
229    /* indicators for whether memory may be allocated after ubidi_open() */
230    UBool mayAllocateText, mayAllocateRuns;
231
232    /* arrays with one value per text-character */
233    const DirProp *dirProps;
234    UBiDiLevel *levels;
235
236    /* are we performing an approximation of the "inverse BiDi" algorithm? */
237    UBool isInverse;
238
239    /* are we using the basic algorithm or its variation? */
240    UBiDiReorderingMode reorderingMode;
241
242    /* UBIDI_REORDER_xxx values must be ordered so that all the regular
243     * logical to visual modes come first, and all inverse BiDi modes
244     * come last.
245     */
246    #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL    UBIDI_REORDER_NUMBERS_SPECIAL
247
248    /* bitmask for reordering options */
249    uint32_t reorderingOptions;
250
251    /* must block separators receive level 0? */
252    UBool orderParagraphsLTR;
253
254    /* the paragraph level */
255    UBiDiLevel paraLevel;
256    /* original paraLevel when contextual */
257    /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
258    UBiDiLevel defaultParaLevel;
259
260    /* context data */
261    const UChar *prologue;
262    int32_t proLength;
263    const UChar *epilogue;
264    int32_t epiLength;
265
266    /* the following is set in ubidi_setPara, used in processPropertySeq */
267    const struct ImpTabPair * pImpTabPair;  /* pointer to levels state table pair */
268
269    /* the overall paragraph or line directionality - see UBiDiDirection */
270    UBiDiDirection direction;
271
272    /* flags is a bit set for which directional properties are in the text */
273    Flags flags;
274
275    /* lastArabicPos is index to the last AL in the text, -1 if none */
276    int32_t lastArabicPos;
277
278    /* characters after trailingWSStart are WS and are */
279    /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
280    int32_t trailingWSStart;
281
282    /* fields for paragraph handling */
283    int32_t paraCount;                  /* set in getDirProps() */
284    Para *paras;                        /* limits of paragraphs, filled in
285                            ResolveExplicitLevels() or CheckExplicitLevels() */
286
287    /* for single paragraph text, we only need a tiny array of paras (no malloc()) */
288    Para simpleParas[1];
289
290    /* fields for line reordering */
291    int32_t runCount;     /* ==-1: runs not set up yet */
292    Run *runs;
293
294    /* for non-mixed text, we only need a tiny array of runs (no malloc()) */
295    Run simpleRuns[1];
296
297    /* for inverse Bidi with insertion of directional marks */
298    InsertPoints insertPoints;
299
300    /* for option UBIDI_OPTION_REMOVE_CONTROLS */
301    int32_t controlCount;
302
303    /* for Bidi class callback */
304    UBiDiClassCallback *fnClassCallback;    /* action pointer */
305    const void *coClassCallback;            /* context pointer */
306};
307
308#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
309#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
310
311typedef union {
312    DirProp *dirPropsMemory;
313    UBiDiLevel *levelsMemory;
314    Para *parasMemory;
315    Run *runsMemory;
316} BidiMemoryForAllocation;
317
318/* Macros for initial checks at function entry */
319#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue)   \
320        if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue
321#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue)   \
322        if(!IS_VALID_PARA(bidi)) {  \
323            errcode=U_INVALID_STATE_ERROR;  \
324            return retvalue;                \
325        }
326#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue)   \
327        if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
328            errcode=U_INVALID_STATE_ERROR;  \
329            return retvalue;                \
330        }
331#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue)   \
332        if((arg)<(start) || (arg)>=(limit)) {       \
333            (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
334            return retvalue;                        \
335        }
336
337#define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode)   \
338        if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return
339#define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode)   \
340        if(!IS_VALID_PARA(bidi)) {  \
341            errcode=U_INVALID_STATE_ERROR;  \
342            return;                \
343        }
344#define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode)   \
345        if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
346            errcode=U_INVALID_STATE_ERROR;  \
347            return;                \
348        }
349#define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode)   \
350        if((arg)<(start) || (arg)>=(limit)) {       \
351            (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
352            return;                        \
353        }
354
355/* helper function to (re)allocate memory if allowed */
356U_CFUNC UBool
357ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
358
359/* helper macros for each allocated array in UBiDi */
360#define getDirPropsMemory(pBiDi, length) \
361        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
362                        (pBiDi)->mayAllocateText, (length))
363
364#define getLevelsMemory(pBiDi, length) \
365        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
366                        (pBiDi)->mayAllocateText, (length))
367
368#define getRunsMemory(pBiDi, length) \
369        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
370                        (pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
371
372/* additional macros used by ubidi_open() - always allow allocation */
373#define getInitialDirPropsMemory(pBiDi, length) \
374        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
375                        TRUE, (length))
376
377#define getInitialLevelsMemory(pBiDi, length) \
378        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
379                        TRUE, (length))
380
381#define getInitialParasMemory(pBiDi, length) \
382        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
383                        TRUE, (length)*sizeof(Para))
384
385#define getInitialRunsMemory(pBiDi, length) \
386        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
387                        TRUE, (length)*sizeof(Run))
388
389#endif
390
391#endif
392