1/*
2******************************************************************************
3*
4*   Copyright (C) 1999-2014, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  ubidiimp.h
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 1999aug06
14*   created by: Markus W. Scherer, updated by Matitiahu Allouche
15*/
16
17#ifndef UBIDIIMP_H
18#define UBIDIIMP_H
19
20/*  set import/export definitions */
21#ifdef U_COMMON_IMPLEMENTATION
22
23#include "unicode/utypes.h"
24#include "unicode/uchar.h"
25#include "ubidi_props.h"
26
27/* miscellaneous definitions ---------------------------------------------- */
28
29typedef uint8_t DirProp;
30typedef uint32_t Flags;
31
32/*  Comparing the description of the BiDi algorithm with this implementation
33    is easier with the same names for the BiDi types in the code as there.
34    See UCharDirection in uchar.h .
35*/
36enum {
37    L=  U_LEFT_TO_RIGHT,                /*  0 */
38    R=  U_RIGHT_TO_LEFT,                /*  1 */
39    EN= U_EUROPEAN_NUMBER,              /*  2 */
40    ES= U_EUROPEAN_NUMBER_SEPARATOR,    /*  3 */
41    ET= U_EUROPEAN_NUMBER_TERMINATOR,   /*  4 */
42    AN= U_ARABIC_NUMBER,                /*  5 */
43    CS= U_COMMON_NUMBER_SEPARATOR,      /*  6 */
44    B=  U_BLOCK_SEPARATOR,              /*  7 */
45    S=  U_SEGMENT_SEPARATOR,            /*  8 */
46    WS= U_WHITE_SPACE_NEUTRAL,          /*  9 */
47    ON= U_OTHER_NEUTRAL,                /* 10 */
48    LRE=U_LEFT_TO_RIGHT_EMBEDDING,      /* 11 */
49    LRO=U_LEFT_TO_RIGHT_OVERRIDE,       /* 12 */
50    AL= U_RIGHT_TO_LEFT_ARABIC,         /* 13 */
51    RLE=U_RIGHT_TO_LEFT_EMBEDDING,      /* 14 */
52    RLO=U_RIGHT_TO_LEFT_OVERRIDE,       /* 15 */
53    PDF=U_POP_DIRECTIONAL_FORMAT,       /* 16 */
54    NSM=U_DIR_NON_SPACING_MARK,         /* 17 */
55    BN= U_BOUNDARY_NEUTRAL,             /* 18 */
56    FSI=U_FIRST_STRONG_ISOLATE,         /* 19 */
57    LRI=U_LEFT_TO_RIGHT_ISOLATE,        /* 20 */
58    RLI=U_RIGHT_TO_LEFT_ISOLATE,        /* 21 */
59    PDI=U_POP_DIRECTIONAL_ISOLATE,      /* 22 */
60    ENL,    /* EN after W7 */           /* 23 */
61    ENR,    /* EN not subject to W7 */  /* 24 */
62    dirPropCount
63};
64
65/*  Sometimes, bit values are more appropriate
66    to deal with directionality properties.
67    Abbreviations in these macro names refer to names
68    used in the BiDi algorithm.
69*/
70#define DIRPROP_FLAG(dir) (1UL<<(dir))
71#define PURE_DIRPROP(prop)  ((prop)&~0xE0)    ?????????????????????????
72
73/* special flag for multiple runs from explicit embedding codes */
74#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
75
76/* are there any characters that are LTR or RTL? */
77#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(ENL)|DIRPROP_FLAG(ENR)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI))
78#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI))
79#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
80#define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN))
81
82/* explicit embedding codes */
83#define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF))
84
85/* explicit isolate codes */
86#define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI))
87
88#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
89
90/* paragraph and segment separators */
91#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
92
93/* all types that are counted as White Space or Neutral in some steps */
94#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO)
95
96/* types that are neutrals or could becomes neutrals in (Wn) */
97#define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS)
98
99/*
100 *  These types may be changed to "e",
101 *  the embedding type (L or R) of the run,
102 *  in the BiDi algorithm (N2)
103 */
104#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
105
106/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
107#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
108
109#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
110
111/*
112 *  The following bit is used for the directional isolate status.
113 *  Stack entries corresponding to isolate sequences are greater than ISOLATE.
114 */
115#define ISOLATE  0x0100
116
117U_CFUNC UBiDiLevel
118ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t index);
119
120#define GET_PARALEVEL(ubidi, index) \
121            ((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \
122                         (ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index))))
123
124/* number of paras entries allocated initially without malloc */
125#define SIMPLE_PARAS_SIZE   10
126/* number of isolate entries allocated initially without malloc */
127#define SIMPLE_ISOLATES_SIZE 5
128/* number of isolate run entries for paired brackets allocated initially without malloc */
129#define SIMPLE_OPENINGS_SIZE 20
130
131#define CR  0x000D
132#define LF  0x000A
133
134/* Run structure for reordering --------------------------------------------- */
135enum {
136    LRM_BEFORE=1,
137    LRM_AFTER=2,
138    RLM_BEFORE=4,
139    RLM_AFTER=8
140};
141
142typedef struct Para {
143    int32_t limit;
144    int32_t level;
145} Para;
146
147enum {                                  /* flags for Opening.flags */
148    FOUND_L=DIRPROP_FLAG(L),
149    FOUND_R=DIRPROP_FLAG(R)
150};
151
152typedef struct Opening {
153    int32_t position;                   /* position of opening bracket */
154    int32_t match;                      /* matching char or -position of closing bracket */
155    int32_t contextPos;                 /* position of last strong char found before opening */
156    uint16_t flags;                     /* bits for L or R/AL found within the pair */
157    UBiDiDirection contextDir;          /* L or R according to last strong char before opening */
158    uint8_t filler;                     /* to complete a nice multiple of 4 chars */
159} Opening;
160
161typedef struct IsoRun {
162    int32_t  contextPos;                /* position of char determining context */
163    uint16_t start;                     /* index of first opening entry for this run */
164    uint16_t limit;                     /* index after last opening entry for this run */
165    UBiDiLevel level;                   /* level of this run */
166    DirProp lastStrong;                 /* bidi class of last strong char found in this run */
167    DirProp lastBase;                   /* bidi class of last base char found in this run */
168    UBiDiDirection contextDir;          /* L or R to use as context for following openings */
169} IsoRun;
170
171typedef struct BracketData {
172    UBiDi   *pBiDi;
173    /* array of opening entries which should be enough in most cases; no malloc() */
174    Opening simpleOpenings[SIMPLE_OPENINGS_SIZE];
175    Opening *openings;                  /* pointer to current array of entries */
176    int32_t openingsCount;               /* number of allocated entries */
177    int32_t isoRunLast;                 /* index of last used entry */
178    /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
179       + 1 for index 0, + 1 for before the first isolated sequence */
180    IsoRun  isoRuns[UBIDI_MAX_EXPLICIT_LEVEL+2];
181    UBool isNumbersSpecial;             /* reordering mode for NUMBERS_SPECIAL */
182} BracketData;
183
184typedef struct Isolate {
185    int32_t startON;
186    int32_t start1;
187    int16_t stateImp;
188    int16_t state;
189} Isolate;
190
191typedef struct Run {
192    int32_t logicalStart,   /* first character of the run; b31 indicates even/odd level */
193            visualLimit,    /* last visual position of the run +1 */
194            insertRemove;   /* if >0, flags for inserting LRM/RLM before/after run,
195                               if <0, count of bidi controls within run            */
196} Run;
197
198/* in a Run, logicalStart will get this bit set if the run level is odd */
199#define INDEX_ODD_BIT (1UL<<31)
200
201#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
202#define ADD_ODD_BIT_FROM_LEVEL(x, level)  ((x)|=((int32_t)(level)<<31))
203#define REMOVE_ODD_BIT(x)                 ((x)&=~INDEX_ODD_BIT)
204
205#define GET_INDEX(x)   ((x)&~INDEX_ODD_BIT)
206#define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
207#define IS_ODD_RUN(x)  ((UBool)(((x)&INDEX_ODD_BIT)!=0))
208#define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0))
209
210U_CFUNC UBool
211ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
212
213/** BiDi control code points */
214enum {
215    ZWNJ_CHAR=0x200c,
216    ZWJ_CHAR,
217    LRM_CHAR,
218    RLM_CHAR,
219    LRE_CHAR=0x202a,
220    RLE_CHAR,
221    PDF_CHAR,
222    LRO_CHAR,
223    RLO_CHAR,
224    LRI_CHAR=0x2066,
225    RLI_CHAR,
226    FSI_CHAR,
227    PDI_CHAR
228};
229
230#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4)
231
232/* InsertPoints structure for noting where to put BiDi marks ---------------- */
233
234typedef struct Point {
235    int32_t pos;            /* position in text */
236    int32_t flag;           /* flag for LRM/RLM, before/after */
237} Point;
238
239typedef struct InsertPoints {
240    int32_t capacity;       /* number of points allocated */
241    int32_t size;           /* number of points used */
242    int32_t confirmed;      /* number of points confirmed */
243    UErrorCode errorCode;   /* for eventual memory shortage */
244    Point *points;          /* pointer to array of points */
245} InsertPoints;
246
247
248/* UBiDi structure ----------------------------------------------------------- */
249
250struct UBiDi {
251    /* pointer to parent paragraph object (pointer to self if this object is
252     * a paragraph object); set to NULL in a newly opened object; set to a
253     * real value after a successful execution of ubidi_setPara or ubidi_setLine
254     */
255    const UBiDi * pParaBiDi;
256
257    const UBiDiProps *bdp;
258
259    /* alias pointer to the current text */
260    const UChar *text;
261
262    /* length of the current text */
263    int32_t originalLength;
264
265    /* if the UBIDI_OPTION_STREAMING option is set, this is the length
266     * of text actually processed by ubidi_setPara, which may be shorter than
267     * the original length.
268     * Otherwise, it is identical to the original length.
269     */
270    int32_t length;
271
272    /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
273     * marks are allowed to be inserted in one of the reordering mode, the
274     * length of the result string may be different from the processed length.
275     */
276    int32_t resultLength;
277
278    /* memory sizes in bytes */
279    int32_t dirPropsSize, levelsSize, openingsSize, parasSize, runsSize, isolatesSize;
280
281    /* allocated memory */
282    DirProp *dirPropsMemory;
283    UBiDiLevel *levelsMemory;
284    Opening *openingsMemory;
285    Para *parasMemory;
286    Run *runsMemory;
287    Isolate *isolatesMemory;
288
289    /* indicators for whether memory may be allocated after ubidi_open() */
290    UBool mayAllocateText, mayAllocateRuns;
291
292    /* arrays with one value per text-character */
293    DirProp *dirProps;
294    UBiDiLevel *levels;
295
296    /* are we performing an approximation of the "inverse BiDi" algorithm? */
297    UBool isInverse;
298
299    /* are we using the basic algorithm or its variation? */
300    UBiDiReorderingMode reorderingMode;
301
302    /* UBIDI_REORDER_xxx values must be ordered so that all the regular
303     * logical to visual modes come first, and all inverse BiDi modes
304     * come last.
305     */
306    #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL    UBIDI_REORDER_NUMBERS_SPECIAL
307
308    /* bitmask for reordering options */
309    uint32_t reorderingOptions;
310
311    /* must block separators receive level 0? */
312    UBool orderParagraphsLTR;
313
314    /* the paragraph level */
315    UBiDiLevel paraLevel;
316    /* original paraLevel when contextual */
317    /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
318    UBiDiLevel defaultParaLevel;
319
320    /* context data */
321    const UChar *prologue;
322    int32_t proLength;
323    const UChar *epilogue;
324    int32_t epiLength;
325
326    /* the following is set in ubidi_setPara, used in processPropertySeq */
327    const struct ImpTabPair * pImpTabPair;  /* pointer to levels state table pair */
328
329    /* the overall paragraph or line directionality - see UBiDiDirection */
330    UBiDiDirection direction;
331
332    /* flags is a bit set for which directional properties are in the text */
333    Flags flags;
334
335    /* lastArabicPos is index to the last AL in the text, -1 if none */
336    int32_t lastArabicPos;
337
338    /* characters after trailingWSStart are WS and are */
339    /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
340    int32_t trailingWSStart;
341
342    /* fields for paragraph handling */
343    int32_t paraCount;                  /* set in getDirProps() */
344    /* filled in getDirProps() */
345    Para *paras;
346
347    /* for relatively short text, we only need a tiny array of paras (no malloc()) */
348    Para simpleParas[SIMPLE_PARAS_SIZE];
349
350    /* fields for line reordering */
351    int32_t runCount;     /* ==-1: runs not set up yet */
352    Run *runs;
353
354    /* for non-mixed text, we only need a tiny array of runs (no malloc()) */
355    Run simpleRuns[1];
356
357    /* maximum or current nesting depth of isolate sequences */
358    /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
359       nesting encountered.
360       Within resolveImplicitLevels(), this is the index of the current isolates
361       stack entry. */
362    int32_t isolateCount;
363    Isolate *isolates;
364
365    /* for simple text, have a small stack (no malloc()) */
366    Isolate simpleIsolates[SIMPLE_ISOLATES_SIZE];
367
368    /* for inverse Bidi with insertion of directional marks */
369    InsertPoints insertPoints;
370
371    /* for option UBIDI_OPTION_REMOVE_CONTROLS */
372    int32_t controlCount;
373
374    /* for Bidi class callback */
375    UBiDiClassCallback *fnClassCallback;    /* action pointer */
376    const void *coClassCallback;            /* context pointer */
377};
378
379#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
380#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
381
382typedef union {
383    DirProp *dirPropsMemory;
384    UBiDiLevel *levelsMemory;
385    Opening *openingsMemory;
386    Para *parasMemory;
387    Run *runsMemory;
388    Isolate *isolatesMemory;
389} BidiMemoryForAllocation;
390
391/* Macros for initial checks at function entry */
392#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue)   \
393        if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue
394#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue)   \
395        if(!IS_VALID_PARA(bidi)) {  \
396            errcode=U_INVALID_STATE_ERROR;  \
397            return retvalue;                \
398        }
399#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue)   \
400        if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
401            errcode=U_INVALID_STATE_ERROR;  \
402            return retvalue;                \
403        }
404#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue)   \
405        if((arg)<(start) || (arg)>=(limit)) {       \
406            (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
407            return retvalue;                        \
408        }
409
410#define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode)   \
411        if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return
412#define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode)   \
413        if(!IS_VALID_PARA(bidi)) {  \
414            errcode=U_INVALID_STATE_ERROR;  \
415            return;                \
416        }
417#define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode)   \
418        if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
419            errcode=U_INVALID_STATE_ERROR;  \
420            return;                \
421        }
422#define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode)   \
423        if((arg)<(start) || (arg)>=(limit)) {       \
424            (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
425            return;                        \
426        }
427
428/* helper function to (re)allocate memory if allowed */
429U_CFUNC UBool
430ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
431
432/* helper macros for each allocated array in UBiDi */
433#define getDirPropsMemory(pBiDi, length) \
434        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
435                        (pBiDi)->mayAllocateText, (length))
436
437#define getLevelsMemory(pBiDi, length) \
438        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
439                        (pBiDi)->mayAllocateText, (length))
440
441#define getRunsMemory(pBiDi, length) \
442        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
443                        (pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
444
445/* additional macros used by ubidi_open() - always allow allocation */
446#define getInitialDirPropsMemory(pBiDi, length) \
447        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
448                        TRUE, (length))
449
450#define getInitialLevelsMemory(pBiDi, length) \
451        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
452                        TRUE, (length))
453
454#define getInitialOpeningsMemory(pBiDi, length) \
455        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \
456                        TRUE, (length)*sizeof(Opening))
457
458#define getInitialParasMemory(pBiDi, length) \
459        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
460                        TRUE, (length)*sizeof(Para))
461
462#define getInitialRunsMemory(pBiDi, length) \
463        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
464                        TRUE, (length)*sizeof(Run))
465
466#define getInitialIsolatesMemory(pBiDi, length) \
467        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \
468                        TRUE, (length)*sizeof(Isolate))
469
470#endif
471
472#endif
473