1/*
2******************************************************************************
3*
4*   Copyright (C) 1999-2013, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  ubidiimp.h
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 1999aug06
14*   created by: Markus W. Scherer, updated by Matitiahu Allouche
15*/
16
17#ifndef UBIDIIMP_H
18#define UBIDIIMP_H
19
20/* set import/export definitions */
21#ifdef U_COMMON_IMPLEMENTATION
22
23#include "unicode/utypes.h"
24#include "unicode/uchar.h"
25#include "ubidi_props.h"
26
27/* miscellaneous definitions ---------------------------------------------- */
28
29typedef uint8_t DirProp;
30typedef uint32_t Flags;
31
32/*  Comparing the description of the BiDi algorithm with this implementation
33    is easier with the same names for the BiDi types in the code as there.
34    See UCharDirection in uchar.h .
35*/
36enum {
37    L=  U_LEFT_TO_RIGHT,                /*  0 */
38    R=  U_RIGHT_TO_LEFT,                /*  1 */
39    EN= U_EUROPEAN_NUMBER,              /*  2 */
40    ES= U_EUROPEAN_NUMBER_SEPARATOR,    /*  3 */
41    ET= U_EUROPEAN_NUMBER_TERMINATOR,   /*  4 */
42    AN= U_ARABIC_NUMBER,                /*  5 */
43    CS= U_COMMON_NUMBER_SEPARATOR,      /*  6 */
44    B=  U_BLOCK_SEPARATOR,              /*  7 */
45    S=  U_SEGMENT_SEPARATOR,            /*  8 */
46    WS= U_WHITE_SPACE_NEUTRAL,          /*  9 */
47    ON= U_OTHER_NEUTRAL,                /* 10 */
48    LRE=U_LEFT_TO_RIGHT_EMBEDDING,      /* 11 */
49    LRO=U_LEFT_TO_RIGHT_OVERRIDE,       /* 12 */
50    AL= U_RIGHT_TO_LEFT_ARABIC,         /* 13 */
51    RLE=U_RIGHT_TO_LEFT_EMBEDDING,      /* 14 */
52    RLO=U_RIGHT_TO_LEFT_OVERRIDE,       /* 15 */
53    PDF=U_POP_DIRECTIONAL_FORMAT,       /* 16 */
54    NSM=U_DIR_NON_SPACING_MARK,         /* 17 */
55    BN= U_BOUNDARY_NEUTRAL,             /* 18 */
56    FSI=U_FIRST_STRONG_ISOLATE,         /* 19 */
57    LRI=U_LEFT_TO_RIGHT_ISOLATE,        /* 20 */
58    RLI=U_RIGHT_TO_LEFT_ISOLATE,        /* 21 */
59    PDI=U_POP_DIRECTIONAL_ISOLATE,      /* 22 */
60    ENL,                                /* 23 */
61    ENR,                                /* 24 */
62    dirPropCount
63};
64
65/*
66 * Sometimes, bit values are more appropriate
67 * to deal with directionality properties.
68 * Abbreviations in these macro names refer to names
69 * used in the BiDi algorithm.
70 */
71#define DIRPROP_FLAG(dir) (1UL<<(dir))
72
73/* special flag for multiple runs from explicit embedding codes */
74#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
75
76/* are there any characters that are LTR or RTL? */
77#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI))
78#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI))
79#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
80#define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN))
81
82/* explicit embedding codes */
83#define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF))
84
85/* explicit isolate codes */
86#define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI))
87
88#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
89
90/* paragraph and segment separators */
91#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
92
93/* all types that are counted as White Space or Neutral in some steps */
94#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO)
95
96/* types that are neutrals or could becomes neutrals in (Wn) */
97#define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS)
98
99/*
100 * These types may be changed to "e",
101 * the embedding type (L or R) of the run,
102 * in the BiDi algorithm (N2)
103 */
104#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
105
106/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
107#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
108
109#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
110
111/*
112 * The following bit is ORed to the property of directional control
113 * characters which are ignored: unmatched PDF or PDI; LRx, RLx or FSI
114 * which would exceed the maximum explicit bidi level.
115 */
116#define IGNORE_CC   0x40
117
118#define PURE_DIRPROP(prop)  ((prop)&~IGNORE_CC)
119
120/*
121 * The following bit is used for the directional isolate status.
122 * Stack entries corresponding to isolate sequences are greater than ISOLATE.
123 */
124#define ISOLATE  0x0100
125
126U_CFUNC UBiDiLevel
127ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t index);
128
129#define GET_PARALEVEL(ubidi, index) \
130            ((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \
131                         (ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index))))
132
133/* number of paras entries allocated initially without malloc */
134#define SIMPLE_PARAS_SIZE   10
135/* number of isolate entries allocated initially without malloc */
136#define SIMPLE_ISOLATES_SIZE 5
137/* number of isolate run entries for paired brackets allocated initially without malloc */
138#define SIMPLE_OPENINGS_SIZE 20
139
140#define CR  0x000D
141#define LF  0x000A
142
143/* Run structure for reordering --------------------------------------------- */
144enum {
145    LRM_BEFORE=1,
146    LRM_AFTER=2,
147    RLM_BEFORE=4,
148    RLM_AFTER=8
149};
150
151typedef struct Para {
152    int32_t limit;
153    int32_t level;
154} Para;
155
156enum {                                  /* flags for Opening.flags */
157    FOUND_L=DIRPROP_FLAG(L),
158    FOUND_R=DIRPROP_FLAG(R)
159};
160
161typedef struct Opening {
162    int32_t position;                   /* position of opening bracket */
163    int32_t match;                      /* matching char or -position of closing bracket */
164    int32_t contextPos;                 /* position of last strong char found before opening */
165    uint16_t flags;                     /* bits for L or R/AL found within the pair */
166    UBiDiDirection contextDir;          /* L or R according to last strong char before opening */
167    uint8_t filler;                     /* to complete a nice multiple of 4 chars */
168} Opening;
169
170typedef struct IsoRun {
171    int32_t  lastStrongPos;             /* position of last strong char found in this run */
172    int32_t  contextPos;                /* position of last char defining context */
173    uint16_t start;                     /* index of first opening entry for this run */
174    uint16_t limit;                     /* index after last opening entry for this run */
175    UBiDiLevel level;                   /* level of this run */
176    DirProp lastStrong;                 /* bidi class of last strong char found in this run */
177    UBiDiDirection contextDir;          /* L or R to use as context for following openings */
178    uint8_t filler;                     /* to complete a nice multiple of 4 chars */
179} IsoRun;
180
181typedef struct BracketData {
182    UBiDi   *pBiDi;
183    /* array of opening entries which should be enough in most cases; no malloc() */
184    Opening simpleOpenings[SIMPLE_OPENINGS_SIZE];
185    Opening *openings;                  /* pointer to current array of entries */
186    int32_t openingsCount;              /* number of allocated entries */
187    int32_t isoRunLast;                 /* index of last used entry */
188    /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
189       + 1 for index 0, + 1 for before the first isolated sequence */
190    IsoRun  isoRuns[UBIDI_MAX_EXPLICIT_LEVEL+2];
191    UBool isNumbersSpecial;             /* reordering mode for NUMBERS_SPECIAL */
192} BracketData;
193
194typedef struct Isolate {
195    int32_t start1;
196    int16_t stateImp;
197    int16_t state;
198} Isolate;
199
200typedef struct Run {
201    int32_t logicalStart,   /* first character of the run; b31 indicates even/odd level */
202            visualLimit,    /* last visual position of the run +1 */
203            insertRemove;   /* if >0, flags for inserting LRM/RLM before/after run,
204                               if <0, count of bidi controls within run            */
205} Run;
206
207/* in a Run, logicalStart will get this bit set if the run level is odd */
208#define INDEX_ODD_BIT (1UL<<31)
209
210#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
211#define ADD_ODD_BIT_FROM_LEVEL(x, level)  ((x)|=((int32_t)(level)<<31))
212#define REMOVE_ODD_BIT(x)                 ((x)&=~INDEX_ODD_BIT)
213
214#define GET_INDEX(x)   ((x)&~INDEX_ODD_BIT)
215#define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
216#define IS_ODD_RUN(x)  ((UBool)(((x)&INDEX_ODD_BIT)!=0))
217#define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0))
218
219U_CFUNC UBool
220ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
221
222/** BiDi control code points */
223enum {
224    ZWNJ_CHAR=0x200c,
225    ZWJ_CHAR,
226    LRM_CHAR,
227    RLM_CHAR,
228    LRE_CHAR=0x202a,
229    RLE_CHAR,
230    PDF_CHAR,
231    LRO_CHAR,
232    RLO_CHAR,
233    LRI_CHAR=0x2066,
234    RLI_CHAR,
235    FSI_CHAR,
236    PDI_CHAR
237};
238
239#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4)
240
241/* InsertPoints structure for noting where to put BiDi marks ---------------- */
242
243typedef struct Point {
244    int32_t pos;            /* position in text */
245    int32_t flag;           /* flag for LRM/RLM, before/after */
246} Point;
247
248typedef struct InsertPoints {
249    int32_t capacity;       /* number of points allocated */
250    int32_t size;           /* number of points used */
251    int32_t confirmed;      /* number of points confirmed */
252    UErrorCode errorCode;   /* for eventual memory shortage */
253    Point *points;          /* pointer to array of points */
254} InsertPoints;
255
256
257/* UBiDi structure ----------------------------------------------------------- */
258
259struct UBiDi {
260    /* pointer to parent paragraph object (pointer to self if this object is
261     * a paragraph object); set to NULL in a newly opened object; set to a
262     * real value after a successful execution of ubidi_setPara or ubidi_setLine
263     */
264    const UBiDi * pParaBiDi;
265
266    const UBiDiProps *bdp;
267
268    /* alias pointer to the current text */
269    const UChar *text;
270
271    /* length of the current text */
272    int32_t originalLength;
273
274    /* if the UBIDI_OPTION_STREAMING option is set, this is the length
275     * of text actually processed by ubidi_setPara, which may be shorter than
276     * the original length.
277     * Otherwise, it is identical to the original length.
278     */
279    int32_t length;
280
281    /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
282     * marks are allowed to be inserted in one of the reordering mode, the
283     * length of the result string may be different from the processed length.
284     */
285    int32_t resultLength;
286
287    /* memory sizes in bytes */
288    int32_t dirPropsSize, levelsSize, openingsSize, parasSize, runsSize, isolatesSize;
289
290    /* allocated memory */
291    DirProp *dirPropsMemory;
292    UBiDiLevel *levelsMemory;
293    Opening *openingsMemory;
294    Para *parasMemory;
295    Run *runsMemory;
296    Isolate *isolatesMemory;
297
298    /* indicators for whether memory may be allocated after ubidi_open() */
299    UBool mayAllocateText, mayAllocateRuns;
300
301    /* arrays with one value per text-character */
302    DirProp *dirProps;
303    UBiDiLevel *levels;
304
305    /* are we performing an approximation of the "inverse BiDi" algorithm? */
306    UBool isInverse;
307
308    /* are we using the basic algorithm or its variation? */
309    UBiDiReorderingMode reorderingMode;
310
311    /* UBIDI_REORDER_xxx values must be ordered so that all the regular
312     * logical to visual modes come first, and all inverse BiDi modes
313     * come last.
314     */
315    #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL    UBIDI_REORDER_NUMBERS_SPECIAL
316
317    /* bitmask for reordering options */
318    uint32_t reorderingOptions;
319
320    /* must block separators receive level 0? */
321    UBool orderParagraphsLTR;
322
323    /* the paragraph level */
324    UBiDiLevel paraLevel;
325    /* original paraLevel when contextual */
326    /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
327    UBiDiLevel defaultParaLevel;
328
329    /* context data */
330    const UChar *prologue;
331    int32_t proLength;
332    const UChar *epilogue;
333    int32_t epiLength;
334
335    /* the following is set in ubidi_setPara, used in processPropertySeq */
336    const struct ImpTabPair * pImpTabPair;  /* pointer to levels state table pair */
337
338    /* the overall paragraph or line directionality - see UBiDiDirection */
339    UBiDiDirection direction;
340
341    /* flags is a bit set for which directional properties are in the text */
342    Flags flags;
343
344    /* lastArabicPos is index to the last AL in the text, -1 if none */
345    int32_t lastArabicPos;
346
347    /* characters after trailingWSStart are WS and are */
348    /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
349    int32_t trailingWSStart;
350
351    /* fields for paragraph handling */
352    int32_t paraCount;                  /* set in getDirProps() */
353    /* filled in getDirProps() */
354    Para *paras;
355
356    /* for relatively short text, we only need a tiny array of paras (no malloc()) */
357    Para simpleParas[SIMPLE_PARAS_SIZE];
358
359    /* fields for line reordering */
360    int32_t runCount;     /* ==-1: runs not set up yet */
361    Run *runs;
362
363    /* for non-mixed text, we only need a tiny array of runs (no malloc()) */
364    Run simpleRuns[1];
365
366    /* maximum or current nesting depth of isolate sequences */
367    /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
368       nesting encountered.
369       Within resolveImplicitLevels(), this is the index of the current isolates
370       stack entry. */
371    int32_t isolateCount;
372    Isolate *isolates;
373
374    /* for simple text, have a small stack (no malloc()) */
375    Isolate simpleIsolates[SIMPLE_ISOLATES_SIZE];
376
377    /* for inverse Bidi with insertion of directional marks */
378    InsertPoints insertPoints;
379
380    /* for option UBIDI_OPTION_REMOVE_CONTROLS */
381    int32_t controlCount;
382
383    /* for Bidi class callback */
384    UBiDiClassCallback *fnClassCallback;    /* action pointer */
385    const void *coClassCallback;            /* context pointer */
386};
387
388#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
389#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
390
391typedef union {
392    DirProp *dirPropsMemory;
393    UBiDiLevel *levelsMemory;
394    Opening *openingsMemory;
395    Para *parasMemory;
396    Run *runsMemory;
397    Isolate *isolatesMemory;
398} BidiMemoryForAllocation;
399
400/* Macros for initial checks at function entry */
401#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue)   \
402        if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue
403#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue)   \
404        if(!IS_VALID_PARA(bidi)) {  \
405            errcode=U_INVALID_STATE_ERROR;  \
406            return retvalue;                \
407        }
408#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue)   \
409        if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
410            errcode=U_INVALID_STATE_ERROR;  \
411            return retvalue;                \
412        }
413#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue)   \
414        if((arg)<(start) || (arg)>=(limit)) {       \
415            (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
416            return retvalue;                        \
417        }
418
419#define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode)   \
420        if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return
421#define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode)   \
422        if(!IS_VALID_PARA(bidi)) {  \
423            errcode=U_INVALID_STATE_ERROR;  \
424            return;                \
425        }
426#define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode)   \
427        if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
428            errcode=U_INVALID_STATE_ERROR;  \
429            return;                \
430        }
431#define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode)   \
432        if((arg)<(start) || (arg)>=(limit)) {       \
433            (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
434            return;                        \
435        }
436
437/* helper function to (re)allocate memory if allowed */
438U_CFUNC UBool
439ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
440
441/* helper macros for each allocated array in UBiDi */
442#define getDirPropsMemory(pBiDi, length) \
443        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
444                        (pBiDi)->mayAllocateText, (length))
445
446#define getLevelsMemory(pBiDi, length) \
447        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
448                        (pBiDi)->mayAllocateText, (length))
449
450#define getRunsMemory(pBiDi, length) \
451        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
452                        (pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
453
454/* additional macros used by ubidi_open() - always allow allocation */
455#define getInitialDirPropsMemory(pBiDi, length) \
456        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
457                        TRUE, (length))
458
459#define getInitialLevelsMemory(pBiDi, length) \
460        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
461                        TRUE, (length))
462
463#define getInitialOpeningsMemory(pBiDi, length) \
464        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \
465                        TRUE, (length)*sizeof(Opening))
466
467#define getInitialParasMemory(pBiDi, length) \
468        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
469                        TRUE, (length)*sizeof(Para))
470
471#define getInitialRunsMemory(pBiDi, length) \
472        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
473                        TRUE, (length)*sizeof(Run))
474
475#define getInitialIsolatesMemory(pBiDi, length) \
476        ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \
477                        TRUE, (length)*sizeof(Isolate))
478
479#endif
480
481#endif
482