1/*
2******************************************************************************
3*
4*   Copyright (C) 1999-2014, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  ubidi.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 1999jul27
14*   created by: Markus W. Scherer, updated by Matitiahu Allouche
15*
16*/
17
18#include "cmemory.h"
19#include "unicode/utypes.h"
20#include "unicode/ustring.h"
21#include "unicode/uchar.h"
22#include "unicode/ubidi.h"
23#include "unicode/utf16.h"
24#include "ubidi_props.h"
25#include "ubidiimp.h"
26#include "uassert.h"
27
28/*
29 * General implementation notes:
30 *
31 * Throughout the implementation, there are comments like (W2) that refer to
32 * rules of the BiDi algorithm, in this example to the second rule of the
33 * resolution of weak types.
34 *
35 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
36 * character according to UTF-16, the second UChar gets the directional property of
37 * the entire character assigned, while the first one gets a BN, a boundary
38 * neutral, type, which is ignored by most of the algorithm according to
39 * rule (X9) and the implementation suggestions of the BiDi algorithm.
40 *
41 * Later, adjustWSLevels() will set the level for each BN to that of the
42 * following character (UChar), which results in surrogate pairs getting the
43 * same level on each of their surrogates.
44 *
45 * In a UTF-8 implementation, the same thing could be done: the last byte of
46 * a multi-byte sequence would get the "real" property, while all previous
47 * bytes of that sequence would get BN.
48 *
49 * It is not possible to assign all those parts of a character the same real
50 * property because this would fail in the resolution of weak types with rules
51 * that look at immediately surrounding types.
52 *
53 * As a related topic, this implementation does not remove Boundary Neutral
54 * types from the input, but ignores them wherever this is relevant.
55 * For example, the loop for the resolution of the weak types reads
56 * types until it finds a non-BN.
57 * Also, explicit embedding codes are neither changed into BN nor removed.
58 * They are only treated the same way real BNs are.
59 * As stated before, adjustWSLevels() takes care of them at the end.
60 * For the purpose of conformance, the levels of all these codes
61 * do not matter.
62 *
63 * Note that this implementation modifies the dirProps
64 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
65 * X6, N0 (replace paired brackets by L or R).
66 *
67 * In this implementation, the resolution of weak types (W1 to W6),
68 * neutrals (N1 and N2), and the assignment of the resolved level (In)
69 * are all done in one single loop, in resolveImplicitLevels().
70 * Changes of dirProp values are done on the fly, without writing
71 * them back to the dirProps array.
72 *
73 *
74 * This implementation contains code that allows to bypass steps of the
75 * algorithm that are not needed on the specific paragraph
76 * in order to speed up the most common cases considerably,
77 * like text that is entirely LTR, or RTL text without numbers.
78 *
79 * Most of this is done by setting a bit for each directional property
80 * in a flags variable and later checking for whether there are
81 * any LTR characters or any RTL characters, or both, whether
82 * there are any explicit embedding codes, etc.
83 *
84 * If the (Xn) steps are performed, then the flags are re-evaluated,
85 * because they will then not contain the embedding codes any more
86 * and will be adjusted for override codes, so that subsequently
87 * more bypassing may be possible than what the initial flags suggested.
88 *
89 * If the text is not mixed-directional, then the
90 * algorithm steps for the weak type resolution are not performed,
91 * and all levels are set to the paragraph level.
92 *
93 * If there are no explicit embedding codes, then the (Xn) steps
94 * are not performed.
95 *
96 * If embedding levels are supplied as a parameter, then all
97 * explicit embedding codes are ignored, and the (Xn) steps
98 * are not performed.
99 *
100 * White Space types could get the level of the run they belong to,
101 * and are checked with a test of (flags&MASK_EMBEDDING) to
102 * consider if the paragraph direction should be considered in
103 * the flags variable.
104 *
105 * If there are no White Space types in the paragraph, then
106 * (L1) is not necessary in adjustWSLevels().
107 */
108
109/* to avoid some conditional statements, use tiny constant arrays */
110static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
111static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
112static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
113
114#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
115#define DIRPROP_FLAG_E(level)  flagE[(level)&1]
116#define DIRPROP_FLAG_O(level)  flagO[(level)&1]
117
118#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
119
120#define NO_OVERRIDE(level)  ((level)&~UBIDI_LEVEL_OVERRIDE)
121
122/* UBiDi object management -------------------------------------------------- */
123
124U_CAPI UBiDi * U_EXPORT2
125ubidi_open(void)
126{
127    UErrorCode errorCode=U_ZERO_ERROR;
128    return ubidi_openSized(0, 0, &errorCode);
129}
130
131U_CAPI UBiDi * U_EXPORT2
132ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
133    UBiDi *pBiDi;
134
135    /* check the argument values */
136    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
137        return NULL;
138    } else if(maxLength<0 || maxRunCount<0) {
139        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
140        return NULL;    /* invalid arguments */
141    }
142
143    /* allocate memory for the object */
144    pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
145    if(pBiDi==NULL) {
146        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
147        return NULL;
148    }
149
150    /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
151    uprv_memset(pBiDi, 0, sizeof(UBiDi));
152
153    /* get BiDi properties */
154    pBiDi->bdp=ubidi_getSingleton();
155
156    /* allocate memory for arrays as requested */
157    if(maxLength>0) {
158        if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
159            !getInitialLevelsMemory(pBiDi, maxLength)
160        ) {
161            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
162        }
163    } else {
164        pBiDi->mayAllocateText=TRUE;
165    }
166
167    if(maxRunCount>0) {
168        if(maxRunCount==1) {
169            /* use simpleRuns[] */
170            pBiDi->runsSize=sizeof(Run);
171        } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
172            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
173        }
174    } else {
175        pBiDi->mayAllocateRuns=TRUE;
176    }
177
178    if(U_SUCCESS(*pErrorCode)) {
179        return pBiDi;
180    } else {
181        ubidi_close(pBiDi);
182        return NULL;
183    }
184}
185
186/*
187 * We are allowed to allocate memory if memory==NULL or
188 * mayAllocate==TRUE for each array that we need.
189 * We also try to grow memory as needed if we
190 * allocate it.
191 *
192 * Assume sizeNeeded>0.
193 * If *pMemory!=NULL, then assume *pSize>0.
194 *
195 * ### this realloc() may unnecessarily copy the old data,
196 * which we know we don't need any more;
197 * is this the best way to do this??
198 */
199U_CFUNC UBool
200ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
201    void **pMemory = (void **)bidiMem;
202    /* check for existing memory */
203    if(*pMemory==NULL) {
204        /* we need to allocate memory */
205        if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
206            *pSize=sizeNeeded;
207            return TRUE;
208        } else {
209            return FALSE;
210        }
211    } else {
212        if(sizeNeeded<=*pSize) {
213            /* there is already enough memory */
214            return TRUE;
215        }
216        else if(!mayAllocate) {
217            /* not enough memory, and we must not allocate */
218            return FALSE;
219        } else {
220            /* we try to grow */
221            void *memory;
222            /* in most cases, we do not need the copy-old-data part of
223             * realloc, but it is needed when adding runs using getRunsMemory()
224             * in setParaRunsOnly()
225             */
226            if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
227                *pMemory=memory;
228                *pSize=sizeNeeded;
229                return TRUE;
230            } else {
231                /* we failed to grow */
232                return FALSE;
233            }
234        }
235    }
236}
237
238U_CAPI void U_EXPORT2
239ubidi_close(UBiDi *pBiDi) {
240    if(pBiDi!=NULL) {
241        pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
242        if(pBiDi->dirPropsMemory!=NULL) {
243            uprv_free(pBiDi->dirPropsMemory);
244        }
245        if(pBiDi->levelsMemory!=NULL) {
246            uprv_free(pBiDi->levelsMemory);
247        }
248        if(pBiDi->openingsMemory!=NULL) {
249            uprv_free(pBiDi->openingsMemory);
250        }
251        if(pBiDi->parasMemory!=NULL) {
252            uprv_free(pBiDi->parasMemory);
253        }
254        if(pBiDi->runsMemory!=NULL) {
255            uprv_free(pBiDi->runsMemory);
256        }
257        if(pBiDi->isolatesMemory!=NULL) {
258            uprv_free(pBiDi->isolatesMemory);
259        }
260        if(pBiDi->insertPoints.points!=NULL) {
261            uprv_free(pBiDi->insertPoints.points);
262        }
263
264        uprv_free(pBiDi);
265    }
266}
267
268/* set to approximate "inverse BiDi" ---------------------------------------- */
269
270U_CAPI void U_EXPORT2
271ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
272    if(pBiDi!=NULL) {
273        pBiDi->isInverse=isInverse;
274        pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
275                                          : UBIDI_REORDER_DEFAULT;
276    }
277}
278
279U_CAPI UBool U_EXPORT2
280ubidi_isInverse(UBiDi *pBiDi) {
281    if(pBiDi!=NULL) {
282        return pBiDi->isInverse;
283    } else {
284        return FALSE;
285    }
286}
287
288/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
289 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
290 * concept of RUNS_ONLY which is a double operation.
291 * It could be advantageous to divide this into 3 concepts:
292 * a) Operation: direct / inverse / RUNS_ONLY
293 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
294 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
295 * This would allow combinations not possible today like RUNS_ONLY with
296 * NUMBERS_SPECIAL.
297 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
298 * REMOVE_CONTROLS for the inverse step.
299 * Not all combinations would be supported, and probably not all do make sense.
300 * This would need to document which ones are supported and what are the
301 * fallbacks for unsupported combinations.
302 */
303U_CAPI void U_EXPORT2
304ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
305    if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
306                        && (reorderingMode < UBIDI_REORDER_COUNT)) {
307        pBiDi->reorderingMode = reorderingMode;
308        pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
309    }
310}
311
312U_CAPI UBiDiReorderingMode U_EXPORT2
313ubidi_getReorderingMode(UBiDi *pBiDi) {
314    if (pBiDi!=NULL) {
315        return pBiDi->reorderingMode;
316    } else {
317        return UBIDI_REORDER_DEFAULT;
318    }
319}
320
321U_CAPI void U_EXPORT2
322ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
323    if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
324        reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
325    }
326    if (pBiDi!=NULL) {
327        pBiDi->reorderingOptions=reorderingOptions;
328    }
329}
330
331U_CAPI uint32_t U_EXPORT2
332ubidi_getReorderingOptions(UBiDi *pBiDi) {
333    if (pBiDi!=NULL) {
334        return pBiDi->reorderingOptions;
335    } else {
336        return 0;
337    }
338}
339
340U_CAPI UBiDiDirection U_EXPORT2
341ubidi_getBaseDirection(const UChar *text,
342int32_t length){
343
344    int32_t i;
345    UChar32 uchar;
346    UCharDirection dir;
347
348    if( text==NULL || length<-1 ){
349        return UBIDI_NEUTRAL;
350    }
351
352    if(length==-1) {
353        length=u_strlen(text);
354    }
355
356    for( i = 0 ; i < length; ) {
357        /* i is incremented by U16_NEXT */
358        U16_NEXT(text, i, length, uchar);
359        dir = u_charDirection(uchar);
360        if( dir == U_LEFT_TO_RIGHT )
361                return UBIDI_LTR;
362        if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
363                return UBIDI_RTL;
364    }
365    return UBIDI_NEUTRAL;
366}
367
368/* perform (P2)..(P3) ------------------------------------------------------- */
369
370/**
371 * Returns the directionality of the first strong character
372 * after the last B in prologue, if any.
373 * Requires prologue!=null.
374 */
375static DirProp
376firstL_R_AL(UBiDi *pBiDi) {
377    const UChar *text=pBiDi->prologue;
378    int32_t length=pBiDi->proLength;
379    int32_t i;
380    UChar32 uchar;
381    DirProp dirProp, result=ON;
382    for(i=0; i<length; ) {
383        /* i is incremented by U16_NEXT */
384        U16_NEXT(text, i, length, uchar);
385        dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
386        if(result==ON) {
387            if(dirProp==L || dirProp==R || dirProp==AL) {
388                result=dirProp;
389            }
390        } else {
391            if(dirProp==B) {
392                result=ON;
393            }
394        }
395    }
396    return result;
397}
398
399/*
400 * Check that there are enough entries in the array pointed to by pBiDi->paras
401 */
402static UBool
403checkParaCount(UBiDi *pBiDi) {
404    int32_t count=pBiDi->paraCount;
405    if(pBiDi->paras==pBiDi->simpleParas) {
406        if(count<=SIMPLE_PARAS_SIZE)
407            return TRUE;
408        if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_SIZE * 2))
409            return FALSE;
410        pBiDi->paras=pBiDi->parasMemory;
411        uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_SIZE * sizeof(Para));
412        return TRUE;
413    }
414    if(!getInitialParasMemory(pBiDi, count * 2))
415        return FALSE;
416    pBiDi->paras=pBiDi->parasMemory;
417    return TRUE;
418}
419
420/*
421 * Get the directional properties for the text, calculate the flags bit-set, and
422 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
423 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
424 * When encountering an FSI, it is initially replaced with an LRI, which is the
425 * default. Only if a strong R or AL is found within its scope will the LRI be
426 * replaced by an RLI.
427 */
428static UBool
429getDirProps(UBiDi *pBiDi) {
430    const UChar *text=pBiDi->text;
431    DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
432
433    int32_t i=0, originalLength=pBiDi->originalLength;
434    Flags flags=0;      /* collect all directionalities in the text */
435    UChar32 uchar;
436    DirProp dirProp=0, defaultParaLevel=0;  /* initialize to avoid compiler warnings */
437    UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
438    /* for inverse BiDi, the default para level is set to RTL if there is a
439       strong R or AL character at either end of the text                            */
440    UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
441            (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
442             pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
443    int32_t lastArabicPos=-1;
444    int32_t controlCount=0;
445    UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
446                                       UBIDI_OPTION_REMOVE_CONTROLS);
447
448    typedef enum {
449         NOT_SEEKING_STRONG,            /* 0: not contextual paraLevel, not after FSI */
450         SEEKING_STRONG_FOR_PARA,       /* 1: looking for first strong char in para */
451         SEEKING_STRONG_FOR_FSI,        /* 2: looking for first strong after FSI */
452         LOOKING_FOR_PDI                /* 3: found strong after FSI, looking for PDI */
453    } State;
454    State state;
455    DirProp lastStrong=ON;              /* for default level & inverse BiDi */
456    /* The following stacks are used to manage isolate sequences. Those
457       sequences may be nested, but obviously never more deeply than the
458       maximum explicit embedding level.
459       lastStack is the index of the last used entry in the stack. A value of -1
460       means that there is no open isolate sequence.
461       lastStack is reset to -1 on paragraph boundaries. */
462    /* The following stack contains the position of the initiator of
463       each open isolate sequence */
464    int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
465    /* The following stack contains the last known state before
466       encountering the initiator of an isolate sequence */
467    int8_t  previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
468    int32_t stackLast=-1;
469
470    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
471        pBiDi->length=0;
472    defaultParaLevel=pBiDi->paraLevel&1;
473    if(isDefaultLevel) {
474        pBiDi->paras[0].level=defaultParaLevel;
475        lastStrong=defaultParaLevel;
476        if(pBiDi->proLength>0 &&                    /* there is a prologue */
477           (dirProp=firstL_R_AL(pBiDi))!=ON) {  /* with a strong character */
478            if(dirProp==L)
479                pBiDi->paras[0].level=0;    /* set the default para level */
480            else
481                pBiDi->paras[0].level=1;    /* set the default para level */
482            state=NOT_SEEKING_STRONG;
483        } else {
484            state=SEEKING_STRONG_FOR_PARA;
485        }
486    } else {
487        pBiDi->paras[0].level=pBiDi->paraLevel;
488        state=NOT_SEEKING_STRONG;
489    }
490    /* count paragraphs and determine the paragraph level (P2..P3) */
491    /*
492     * see comment in ubidi.h:
493     * the UBIDI_DEFAULT_XXX values are designed so that
494     * their bit 0 alone yields the intended default
495     */
496    for( /* i=0 above */ ; i<originalLength; ) {
497        /* i is incremented by U16_NEXT */
498        U16_NEXT(text, i, originalLength, uchar);
499        flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
500        dirProps[i-1]=dirProp;
501        if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
502            flags|=DIRPROP_FLAG(BN);
503            dirProps[i-2]=BN;
504        }
505        if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
506            controlCount++;
507        if(dirProp==L) {
508            if(state==SEEKING_STRONG_FOR_PARA) {
509                pBiDi->paras[pBiDi->paraCount-1].level=0;
510                state=NOT_SEEKING_STRONG;
511            }
512            else if(state==SEEKING_STRONG_FOR_FSI) {
513                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
514                    /* no need for next statement, already set by default */
515                    /* dirProps[isolateStartStack[stackLast]]=LRI; */
516                    flags|=DIRPROP_FLAG(LRI);
517                }
518                state=LOOKING_FOR_PDI;
519            }
520            lastStrong=L;
521            continue;
522        }
523        if(dirProp==R || dirProp==AL) {
524            if(state==SEEKING_STRONG_FOR_PARA) {
525                pBiDi->paras[pBiDi->paraCount-1].level=1;
526                state=NOT_SEEKING_STRONG;
527            }
528            else if(state==SEEKING_STRONG_FOR_FSI) {
529                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
530                    dirProps[isolateStartStack[stackLast]]=RLI;
531                    flags|=DIRPROP_FLAG(RLI);
532                }
533                state=LOOKING_FOR_PDI;
534            }
535            lastStrong=R;
536            if(dirProp==AL)
537                lastArabicPos=i-1;
538            continue;
539        }
540        if(dirProp>=FSI && dirProp<=RLI) {  /* FSI, LRI or RLI */
541            stackLast++;
542            if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
543                isolateStartStack[stackLast]=i-1;
544                previousStateStack[stackLast]=state;
545            }
546            if(dirProp==FSI) {
547                dirProps[i-1]=LRI;      /* default if no strong char */
548                state=SEEKING_STRONG_FOR_FSI;
549            }
550            else
551                state=LOOKING_FOR_PDI;
552            continue;
553        }
554        if(dirProp==PDI) {
555            if(state==SEEKING_STRONG_FOR_FSI) {
556                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
557                    /* no need for next statement, already set by default */
558                    /* dirProps[isolateStartStack[stackLast]]=LRI; */
559                    flags|=DIRPROP_FLAG(LRI);
560                }
561            }
562            if(stackLast>=0) {
563                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
564                    state=previousStateStack[stackLast];
565                stackLast--;
566            }
567            continue;
568        }
569        if(dirProp==B) {
570            if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
571                continue;
572            pBiDi->paras[pBiDi->paraCount-1].limit=i;
573            if(isDefaultLevelInverse && lastStrong==R)
574                pBiDi->paras[pBiDi->paraCount-1].level=1;
575            if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
576                /* When streaming, we only process whole paragraphs
577                   thus some updates are only done on paragraph boundaries */
578                pBiDi->length=i;        /* i is index to next character */
579                pBiDi->controlCount=controlCount;
580            }
581            if(i<originalLength) {              /* B not last char in text */
582                pBiDi->paraCount++;
583                if(checkParaCount(pBiDi)==FALSE)    /* not enough memory for a new para entry */
584                    return FALSE;
585                if(isDefaultLevel) {
586                    pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
587                    state=SEEKING_STRONG_FOR_PARA;
588                    lastStrong=defaultParaLevel;
589                } else {
590                    pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
591                    state=NOT_SEEKING_STRONG;
592                }
593                stackLast=-1;
594            }
595            continue;
596        }
597    }
598    /* Ignore still open isolate sequences with overflow */
599    if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
600        stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
601        state=SEEKING_STRONG_FOR_FSI;   /* to be on the safe side */
602    }
603    /* Resolve direction of still unresolved open FSI sequences */
604    while(stackLast>=0) {
605        if(state==SEEKING_STRONG_FOR_FSI) {
606            /* no need for next statement, already set by default */
607            /* dirProps[isolateStartStack[stackLast]]=LRI; */
608            flags|=DIRPROP_FLAG(LRI);
609            break;
610        }
611        state=previousStateStack[stackLast];
612        stackLast--;
613    }
614    /* When streaming, ignore text after the last paragraph separator */
615    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
616        if(pBiDi->length<originalLength)
617            pBiDi->paraCount--;
618    } else {
619        pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
620        pBiDi->controlCount=controlCount;
621    }
622    /* For inverse bidi, default para direction is RTL if there is
623       a strong R or AL at either end of the paragraph */
624    if(isDefaultLevelInverse && lastStrong==R) {
625        pBiDi->paras[pBiDi->paraCount-1].level=1;
626    }
627    if(isDefaultLevel) {
628        pBiDi->paraLevel=pBiDi->paras[0].level;
629    }
630    /* The following is needed to resolve the text direction for default level
631       paragraphs containing no strong character */
632    for(i=0; i<pBiDi->paraCount; i++)
633        flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
634
635    if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
636        flags|=DIRPROP_FLAG(L);
637    }
638    pBiDi->flags=flags;
639    pBiDi->lastArabicPos=lastArabicPos;
640    return TRUE;
641}
642
643/* determine the paragraph level at position index */
644U_CFUNC UBiDiLevel
645ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
646    int32_t i;
647    for(i=0; i<pBiDi->paraCount; i++)
648        if(pindex<pBiDi->paras[i].limit)
649            break;
650    if(i>=pBiDi->paraCount)
651        i=pBiDi->paraCount-1;
652    return (UBiDiLevel)(pBiDi->paras[i].level);
653}
654
655/* Functions for handling paired brackets ----------------------------------- */
656
657/* In the isoRuns array, the first entry is used for text outside of any
658   isolate sequence.  Higher entries are used for each more deeply nested
659   isolate sequence. isoRunLast is the index of the last used entry.  The
660   openings array is used to note the data of opening brackets not yet
661   matched by a closing bracket, or matched but still susceptible to change
662   level.
663   Each isoRun entry contains the index of the first and
664   one-after-last openings entries for pending opening brackets it
665   contains.  The next openings entry to use is the one-after-last of the
666   most deeply nested isoRun entry.
667   isoRun entries also contain their current embedding level and the last
668   encountered strong character, since these will be needed to resolve
669   the level of paired brackets.  */
670
671static void
672bracketInit(UBiDi *pBiDi, BracketData *bd) {
673    bd->pBiDi=pBiDi;
674    bd->isoRunLast=0;
675    bd->isoRuns[0].start=0;
676    bd->isoRuns[0].limit=0;
677    bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
678    bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1;
679    bd->isoRuns[0].contextPos=0;
680    if(pBiDi->openingsMemory) {
681        bd->openings=pBiDi->openingsMemory;
682        bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
683    } else {
684        bd->openings=bd->simpleOpenings;
685        bd->openingsCount=SIMPLE_OPENINGS_SIZE;
686    }
687    bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
688                         bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
689}
690
691/* paragraph boundary */
692static void
693bracketProcessB(BracketData *bd, UBiDiLevel level) {
694    bd->isoRunLast=0;
695    bd->isoRuns[0].limit=0;
696    bd->isoRuns[0].level=level;
697    bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=level&1;
698    bd->isoRuns[0].contextPos=0;
699}
700
701/* LRE, LRO, RLE, RLO, PDF */
702static void
703bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
704                       UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
705    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
706    DirProp *dirProps=bd->pBiDi->dirProps;
707    if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO)  /* after an isolate */
708        return;
709    if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel))   /* not a PDF */
710        contextLevel=embeddingLevel;
711    pLastIsoRun->limit=pLastIsoRun->start;
712    pLastIsoRun->level=embeddingLevel;
713    pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=contextLevel&1;
714    pLastIsoRun->contextPos=lastCcPos;
715}
716
717/* LRI or RLI */
718static void
719bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
720    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
721    int16_t lastLimit;
722    pLastIsoRun->lastBase=ON;
723    lastLimit=pLastIsoRun->limit;
724    bd->isoRunLast++;
725    pLastIsoRun++;
726    pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
727    pLastIsoRun->level=level;
728    pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=level&1;
729    pLastIsoRun->contextPos=0;
730}
731
732/* PDI */
733static void
734bracketProcessPDI(BracketData *bd) {
735    IsoRun *pLastIsoRun;
736    bd->isoRunLast--;
737    pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
738    pLastIsoRun->lastBase=ON;
739}
740
741/* newly found opening bracket: create an openings entry */
742static UBool                            /* return TRUE if success */
743bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
744    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
745    Opening *pOpening;
746    if(pLastIsoRun->limit>=bd->openingsCount) {  /* no available new entry */
747        UBiDi *pBiDi=bd->pBiDi;
748        if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
749            return FALSE;
750        if(bd->openings==bd->simpleOpenings)
751            uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
752                        SIMPLE_OPENINGS_SIZE * sizeof(Opening));
753        bd->openings=pBiDi->openingsMemory;     /* may have changed */
754        bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
755    }
756    pOpening=&bd->openings[pLastIsoRun->limit];
757    pOpening->position=position;
758    pOpening->match=match;
759    pOpening->contextDir=pLastIsoRun->contextDir;
760    pOpening->contextPos=pLastIsoRun->contextPos;
761    pOpening->flags=0;
762    pLastIsoRun->limit++;
763    return TRUE;
764}
765
766/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
767static void
768fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
769    /* This function calls itself recursively */
770    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
771    Opening *qOpening;
772    DirProp *dirProps=bd->pBiDi->dirProps;
773    int32_t k, openingPosition, closingPosition;
774    for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
775        if(qOpening->match>=0)      /* not an N0c match */
776            continue;
777        if(newPropPosition<qOpening->contextPos)
778            break;
779        if(newPropPosition>=qOpening->position)
780            continue;
781        if(newProp==qOpening->contextDir)
782            break;
783        openingPosition=qOpening->position;
784        dirProps[openingPosition]=newProp;
785        closingPosition=-(qOpening->match);
786        dirProps[closingPosition]=newProp;
787        qOpening->match=0;                      /* prevent further changes */
788        fixN0c(bd, k, openingPosition, newProp);
789        fixN0c(bd, k, closingPosition, newProp);
790    }
791}
792
793/* process closing bracket */
794static DirProp              /* return L or R if N0b or N0c, ON if N0d */
795bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
796    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
797    Opening *pOpening, *qOpening;
798    UBiDiDirection direction;
799    UBool stable;
800    DirProp newProp;
801    pOpening=&bd->openings[openIdx];
802    direction=pLastIsoRun->level&1;
803    stable=TRUE;            /* assume stable until proved otherwise */
804
805    /* The stable flag is set when brackets are paired and their
806       level is resolved and cannot be changed by what will be
807       found later in the source string.
808       An unstable match can occur only when applying N0c, where
809       the resolved level depends on the preceding context, and
810       this context may be affected by text occurring later.
811       Example: RTL paragraph containing:  abc[(latin) HEBREW]
812       When the closing parenthesis is encountered, it appears
813       that N0c1 must be applied since 'abc' sets an opposite
814       direction context and both parentheses receive level 2.
815       However, when the closing square bracket is processed,
816       N0b applies because of 'HEBREW' being included within the
817       brackets, thus the square brackets are treated like R and
818       receive level 1. However, this changes the preceding
819       context of the opening parenthesis, and it now appears
820       that N0c2 must be applied to the parentheses rather than
821       N0c1. */
822
823    if((direction==0 && pOpening->flags&FOUND_L) ||
824       (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
825        newProp=direction;
826    }
827    else if(pOpening->flags&(FOUND_L|FOUND_R)) {    /* N0c */
828        /* it is stable if there is no containing pair or in
829           conditions too complicated and not worth checking */
830        stable=(openIdx==pLastIsoRun->start);
831        if(direction!=pOpening->contextDir)
832            newProp=pOpening->contextDir;           /* N0c1 */
833        else
834            newProp=direction;                      /* N0c2 */
835    } else {
836        /* forget this and any brackets nested within this pair */
837        pLastIsoRun->limit=openIdx;
838        return ON;                                  /* N0d */
839    }
840    bd->pBiDi->dirProps[pOpening->position]=newProp;
841    bd->pBiDi->dirProps[position]=newProp;
842    /* Update nested N0c pairs that may be affected */
843    fixN0c(bd, openIdx, pOpening->position, newProp);
844    if(stable) {
845        pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */
846        /* remove lower located synonyms if any */
847        while(pLastIsoRun->limit>pLastIsoRun->start &&
848              bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
849            pLastIsoRun->limit--;
850    } else {
851        int32_t k;
852        pOpening->match=-position;
853        /* neutralize lower located synonyms if any */
854        k=openIdx-1;
855        while(k>=pLastIsoRun->start &&
856              bd->openings[k].position==pOpening->position)
857            bd->openings[k--].match=0;
858        /* neutralize any unmatched opening between the current pair;
859           this will also neutralize higher located synonyms if any */
860        for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
861            qOpening=&bd->openings[k];
862            if(qOpening->position>=position)
863                break;
864            if(qOpening->match>0)
865                qOpening->match=0;
866        }
867    }
868    return newProp;
869}
870
871/* handle strong characters, digits and candidates for closing brackets */
872static UBool                            /* return TRUE if success */
873bracketProcessChar(BracketData *bd, int32_t position) {
874    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
875    DirProp *dirProps, dirProp, newProp;
876    UBiDiLevel level;
877    dirProps=bd->pBiDi->dirProps;
878    dirProp=dirProps[position];
879    if(dirProp==ON) {
880        UChar c, match;
881        int32_t idx;
882        /* First see if it is a matching closing bracket. Hopefully, this is
883           more efficient than checking if it is a closing bracket at all */
884        c=bd->pBiDi->text[position];
885        for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
886            if(bd->openings[idx].match!=c)
887                continue;
888            /* We have a match */
889            newProp=bracketProcessClosing(bd, idx, position);
890            if(newProp==ON) {           /* N0d */
891                c=0;        /* prevent handling as an opening */
892                break;
893            }
894            pLastIsoRun->lastBase=ON;
895            pLastIsoRun->contextDir=newProp;
896            pLastIsoRun->contextPos=position;
897            level=bd->pBiDi->levels[position];
898            if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
899                uint16_t flag;
900                int32_t i;
901                newProp=level&1;
902                pLastIsoRun->lastStrong=newProp;
903                flag=DIRPROP_FLAG(newProp);
904                for(i=pLastIsoRun->start; i<idx; i++)
905                    bd->openings[i].flags|=flag;
906                /* matching brackets are not overridden by LRO/RLO */
907                bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
908            }
909            /* matching brackets are not overridden by LRO/RLO */
910            bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
911            return TRUE;
912        }
913        /* We get here only if the ON character is not a matching closing
914           bracket or it is a case of N0d */
915        /* Now see if it is an opening bracket */
916        if(c)
917            match=u_getBidiPairedBracket(c);    /* get the matching char */
918        else
919            match=0;
920        if(match!=c &&                  /* has a matching char */
921           ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */
922            /* special case: process synonyms
923               create an opening entry for each synonym */
924            if(match==0x232A) {     /* RIGHT-POINTING ANGLE BRACKET */
925                if(!bracketAddOpening(bd, 0x3009, position))
926                    return FALSE;
927            }
928            else if(match==0x3009) {         /* RIGHT ANGLE BRACKET */
929                if(!bracketAddOpening(bd, 0x232A, position))
930                    return FALSE;
931            }
932            if(!bracketAddOpening(bd, match, position))
933                return FALSE;
934        }
935    }
936    level=bd->pBiDi->levels[position];
937    if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
938        newProp=level&1;
939        if(dirProp!=S && dirProp!=WS && dirProp!=ON)
940            dirProps[position]=newProp;
941        pLastIsoRun->lastBase=newProp;
942        pLastIsoRun->lastStrong=newProp;
943        pLastIsoRun->contextDir=newProp;
944        pLastIsoRun->contextPos=position;
945    }
946    else if(dirProp<=R || dirProp==AL) {
947        newProp=DIR_FROM_STRONG(dirProp);
948        pLastIsoRun->lastBase=dirProp;
949        pLastIsoRun->lastStrong=dirProp;
950        pLastIsoRun->contextDir=newProp;
951        pLastIsoRun->contextPos=position;
952    }
953    else if(dirProp==EN) {
954        pLastIsoRun->lastBase=EN;
955        if(pLastIsoRun->lastStrong==L) {
956            newProp=L;                  /* W7 */
957            if(!bd->isNumbersSpecial)
958                dirProps[position]=ENL;
959            pLastIsoRun->contextDir=L;
960            pLastIsoRun->contextPos=position;
961        }
962        else {
963            newProp=R;                  /* N0 */
964            if(pLastIsoRun->lastStrong==AL)
965                dirProps[position]=AN;  /* W2 */
966            else
967                dirProps[position]=ENR;
968            pLastIsoRun->contextDir=R;
969            pLastIsoRun->contextPos=position;
970        }
971    }
972    else if(dirProp==AN) {
973        newProp=R;                      /* N0 */
974        pLastIsoRun->lastBase=AN;
975        pLastIsoRun->contextDir=R;
976        pLastIsoRun->contextPos=position;
977    }
978    else if(dirProp==NSM) {
979        /* if the last real char was ON, change NSM to ON so that it
980           will stay ON even if the last real char is a bracket which
981           may be changed to L or R */
982        newProp=pLastIsoRun->lastBase;
983        if(newProp==ON)
984            dirProps[position]=newProp;
985    }
986    else {
987        newProp=dirProp;
988        pLastIsoRun->lastBase=dirProp;
989    }
990    if(newProp<=R || newProp==AL) {
991        int32_t i;
992        uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
993        for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
994            if(position>bd->openings[i].position)
995                bd->openings[i].flags|=flag;
996    }
997    return TRUE;
998}
999
1000/* perform (X1)..(X9) ------------------------------------------------------- */
1001
1002/* determine if the text is mixed-directional or single-directional */
1003static UBiDiDirection
1004directionFromFlags(UBiDi *pBiDi) {
1005    Flags flags=pBiDi->flags;
1006    /* if the text contains AN and neutrals, then some neutrals may become RTL */
1007    if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1008        return UBIDI_LTR;
1009    } else if(!(flags&MASK_LTR)) {
1010        return UBIDI_RTL;
1011    } else {
1012        return UBIDI_MIXED;
1013    }
1014}
1015
1016/*
1017 * Resolve the explicit levels as specified by explicit embedding codes.
1018 * Recalculate the flags to have them reflect the real properties
1019 * after taking the explicit embeddings into account.
1020 *
1021 * The BiDi algorithm is designed to result in the same behavior whether embedding
1022 * levels are externally specified (from "styled text", supposedly the preferred
1023 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1024 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1025 * However, in a real implementation, the removal of these codes and their index
1026 * positions in the plain text is undesirable since it would result in
1027 * reallocated, reindexed text.
1028 * Instead, this implementation leaves the codes in there and just ignores them
1029 * in the subsequent processing.
1030 * In order to get the same reordering behavior, positions with a BN or a not-isolate
1031 * explicit embedding code just get the same level assigned as the last "real"
1032 * character.
1033 *
1034 * Some implementations, not this one, then overwrite some of these
1035 * directionality properties at "real" same-level-run boundaries by
1036 * L or R codes so that the resolution of weak types can be performed on the
1037 * entire paragraph at once instead of having to parse it once more and
1038 * perform that resolution on same-level-runs.
1039 * This limits the scope of the implicit rules in effectively
1040 * the same way as the run limits.
1041 *
1042 * Instead, this implementation does not modify these codes, except for
1043 * paired brackets whose properties (ON) may be replaced by L or R.
1044 * On one hand, the paragraph has to be scanned for same-level-runs, but
1045 * on the other hand, this saves another loop to reset these codes,
1046 * or saves making and modifying a copy of dirProps[].
1047 *
1048 *
1049 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1050 *
1051 *
1052 * Handling the stack of explicit levels (Xn):
1053 *
1054 * With the BiDi stack of explicit levels, as pushed with each
1055 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1056 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1057 *
1058 * In order to have a correct push-pop semantics even in the case of overflows,
1059 * overflow counters and a valid isolate counter are used as described in UAX#9
1060 * section 3.3.2 "Explicit Levels and Directions".
1061 *
1062 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
1063 *
1064 * Returns normally the direction; -1 if there was a memory shortage
1065 *
1066 */
1067static UBiDiDirection
1068resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1069    DirProp *dirProps=pBiDi->dirProps;
1070    UBiDiLevel *levels=pBiDi->levels;
1071    const UChar *text=pBiDi->text;
1072
1073    int32_t i=0, length=pBiDi->length;
1074    Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
1075    DirProp dirProp;
1076    UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1077    UBiDiDirection direction;
1078    pBiDi->isolateCount=0;
1079
1080    if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1081
1082    /* determine if the text is mixed-directional or single-directional */
1083    direction=directionFromFlags(pBiDi);
1084
1085    /* we may not need to resolve any explicit levels */
1086    if((direction!=UBIDI_MIXED)) {
1087        /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1088        return direction;
1089    }
1090    if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1091        /* inverse BiDi: mixed, but all characters are at the same embedding level */
1092        /* set all levels to the paragraph level */
1093        int32_t paraIndex, start, limit;
1094        for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1095            if(paraIndex==0)
1096                start=0;
1097            else
1098                start=pBiDi->paras[paraIndex-1].limit;
1099            limit=pBiDi->paras[paraIndex].limit;
1100            level=pBiDi->paras[paraIndex].level;
1101            for(i=start; i<limit; i++)
1102                levels[i]=level;
1103        }
1104        return direction;   /* no bracket matching for inverse BiDi */
1105    }
1106    if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1107        /* no embeddings, set all levels to the paragraph level */
1108        /* we still have to perform bracket matching */
1109        int32_t paraIndex, start, limit;
1110        BracketData bracketData;
1111        bracketInit(pBiDi, &bracketData);
1112        for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1113            if(paraIndex==0)
1114                start=0;
1115            else
1116                start=pBiDi->paras[paraIndex-1].limit;
1117            limit=pBiDi->paras[paraIndex].limit;
1118            level=pBiDi->paras[paraIndex].level;
1119            for(i=start; i<limit; i++) {
1120                levels[i]=level;
1121                dirProp=dirProps[i];
1122                if(dirProp==BN)
1123                    continue;
1124                if(dirProp==B) {
1125                    if((i+1)<length) {
1126                        if(text[i]==CR && text[i+1]==LF)
1127                            continue;   /* skip CR when followed by LF */
1128                        bracketProcessB(&bracketData, level);
1129                    }
1130                    continue;
1131                }
1132                if(!bracketProcessChar(&bracketData, i)) {
1133                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1134                    return UBIDI_LTR;
1135                }
1136            }
1137        }
1138        return direction;
1139    }
1140    {
1141        /* continue to perform (Xn) */
1142
1143        /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1144        /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1145        UBiDiLevel embeddingLevel=level, newLevel;
1146        UBiDiLevel previousLevel=level;     /* previous level for regular (not CC) characters */
1147        int32_t lastCcPos=0;                /* index of last effective LRx,RLx, PDx */
1148
1149        /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1150           stackLast points to its current entry. */
1151        uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2];   /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1152                                                        but we need one more entry as base */
1153        uint32_t stackLast=0;
1154        int32_t overflowIsolateCount=0;
1155        int32_t overflowEmbeddingCount=0;
1156        int32_t validIsolateCount=0;
1157        BracketData bracketData;
1158        bracketInit(pBiDi, &bracketData);
1159        stack[0]=level;     /* initialize base entry to para level, no override, no isolate */
1160
1161        /* recalculate the flags */
1162        flags=0;
1163
1164        for(i=0; i<length; ++i) {
1165            dirProp=dirProps[i];
1166            switch(dirProp) {
1167            case LRE:
1168            case RLE:
1169            case LRO:
1170            case RLO:
1171                /* (X2, X3, X4, X5) */
1172                flags|=DIRPROP_FLAG(BN);
1173                levels[i]=previousLevel;
1174                if (dirProp==LRE || dirProp==LRO)
1175                    /* least greater even level */
1176                    newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1177                else
1178                    /* least greater odd level */
1179                    newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1180                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1181                                                         overflowEmbeddingCount==0) {
1182                    lastCcPos=i;
1183                    embeddingLevel=newLevel;
1184                    if(dirProp==LRO || dirProp==RLO)
1185                        embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1186                    stackLast++;
1187                    stack[stackLast]=embeddingLevel;
1188                    /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1189                       since this has already been done for newLevel which is
1190                       the source for embeddingLevel.
1191                     */
1192                } else {
1193                    if(overflowIsolateCount==0)
1194                        overflowEmbeddingCount++;
1195                }
1196                break;
1197            case PDF:
1198                /* (X7) */
1199                flags|=DIRPROP_FLAG(BN);
1200                levels[i]=previousLevel;
1201                /* handle all the overflow cases first */
1202                if(overflowIsolateCount) {
1203                    break;
1204                }
1205                if(overflowEmbeddingCount) {
1206                    overflowEmbeddingCount--;
1207                    break;
1208                }
1209                if(stackLast>0 && stack[stackLast]<ISOLATE) {   /* not an isolate entry */
1210                    lastCcPos=i;
1211                    stackLast--;
1212                    embeddingLevel=(UBiDiLevel)stack[stackLast];
1213                }
1214                break;
1215            case LRI:
1216            case RLI:
1217                flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1218                levels[i]=NO_OVERRIDE(embeddingLevel);
1219                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1220                    bracketProcessBoundary(&bracketData, lastCcPos,
1221                                           previousLevel, embeddingLevel);
1222                    flags|=DIRPROP_FLAG_MULTI_RUNS;
1223                }
1224                previousLevel=embeddingLevel;
1225                /* (X5a, X5b) */
1226                if(dirProp==LRI)
1227                    /* least greater even level */
1228                    newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1229                else
1230                    /* least greater odd level */
1231                    newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1232                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1233                                                         overflowEmbeddingCount==0) {
1234                    flags|=DIRPROP_FLAG(dirProp);
1235                    lastCcPos=i;
1236                    validIsolateCount++;
1237                    if(validIsolateCount>pBiDi->isolateCount)
1238                        pBiDi->isolateCount=validIsolateCount;
1239                    embeddingLevel=newLevel;
1240                    /* we can increment stackLast without checking because newLevel
1241                       will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1242                    stackLast++;
1243                    stack[stackLast]=embeddingLevel+ISOLATE;
1244                    bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1245                } else {
1246                    /* make it WS so that it is handled by adjustWSLevels() */
1247                    dirProps[i]=WS;
1248                    overflowIsolateCount++;
1249                }
1250                break;
1251            case PDI:
1252                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1253                    bracketProcessBoundary(&bracketData, lastCcPos,
1254                                           previousLevel, embeddingLevel);
1255                    flags|=DIRPROP_FLAG_MULTI_RUNS;
1256                }
1257                /* (X6a) */
1258                if(overflowIsolateCount) {
1259                    overflowIsolateCount--;
1260                    /* make it WS so that it is handled by adjustWSLevels() */
1261                    dirProps[i]=WS;
1262                }
1263                else if(validIsolateCount) {
1264                    flags|=DIRPROP_FLAG(PDI);
1265                    lastCcPos=i;
1266                    overflowEmbeddingCount=0;
1267                    while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1268                        stackLast--;                /* until the last isolate entry */
1269                    stackLast--;                    /* pop also the last isolate entry */
1270                    validIsolateCount--;
1271                    bracketProcessPDI(&bracketData);
1272                } else
1273                    /* make it WS so that it is handled by adjustWSLevels() */
1274                    dirProps[i]=WS;
1275                embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1276                flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1277                previousLevel=embeddingLevel;
1278                levels[i]=NO_OVERRIDE(embeddingLevel);
1279                break;
1280            case B:
1281                flags|=DIRPROP_FLAG(B);
1282                levels[i]=GET_PARALEVEL(pBiDi, i);
1283                if((i+1)<length) {
1284                    if(text[i]==CR && text[i+1]==LF)
1285                        break;          /* skip CR when followed by LF */
1286                    overflowEmbeddingCount=overflowIsolateCount=0;
1287                    validIsolateCount=0;
1288                    stackLast=0;
1289                    previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1290                    stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
1291                    bracketProcessB(&bracketData, embeddingLevel);
1292                }
1293                break;
1294            case BN:
1295                /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1296                /* they will get their levels set correctly in adjustWSLevels() */
1297                levels[i]=previousLevel;
1298                flags|=DIRPROP_FLAG(BN);
1299                break;
1300            default:
1301                /* all other types are normal characters and get the "real" level */
1302                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1303                    bracketProcessBoundary(&bracketData, lastCcPos,
1304                                           previousLevel, embeddingLevel);
1305                    flags|=DIRPROP_FLAG_MULTI_RUNS;
1306                    if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1307                        flags|=DIRPROP_FLAG_O(embeddingLevel);
1308                    else
1309                        flags|=DIRPROP_FLAG_E(embeddingLevel);
1310                }
1311                previousLevel=embeddingLevel;
1312                levels[i]=embeddingLevel;
1313                if(!bracketProcessChar(&bracketData, i))
1314                    return -1;
1315                /* the dirProp may have been changed in bracketProcessChar() */
1316                flags|=DIRPROP_FLAG(dirProps[i]);
1317                break;
1318            }
1319        }
1320        if(flags&MASK_EMBEDDING)
1321            flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1322        if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
1323            flags|=DIRPROP_FLAG(L);
1324        /* again, determine if the text is mixed-directional or single-directional */
1325        pBiDi->flags=flags;
1326        direction=directionFromFlags(pBiDi);
1327    }
1328    return direction;
1329}
1330
1331/*
1332 * Use a pre-specified embedding levels array:
1333 *
1334 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1335 * ignore all explicit codes (X9),
1336 * and check all the preset levels.
1337 *
1338 * Recalculate the flags to have them reflect the real properties
1339 * after taking the explicit embeddings into account.
1340 */
1341static UBiDiDirection
1342checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1343    DirProp *dirProps=pBiDi->dirProps;
1344    DirProp dirProp;
1345    UBiDiLevel *levels=pBiDi->levels;
1346    int32_t isolateCount=0;
1347
1348    int32_t i, length=pBiDi->length;
1349    Flags flags=0;  /* collect all directionalities in the text */
1350    UBiDiLevel level;
1351    pBiDi->isolateCount=0;
1352
1353    for(i=0; i<length; ++i) {
1354        level=levels[i];
1355        dirProp=dirProps[i];
1356        if(dirProp==LRI || dirProp==RLI) {
1357            isolateCount++;
1358            if(isolateCount>pBiDi->isolateCount)
1359                pBiDi->isolateCount=isolateCount;
1360        }
1361        else if(dirProp==PDI)
1362            isolateCount--;
1363        else if(dirProp==B)
1364            isolateCount=0;
1365        if(level&UBIDI_LEVEL_OVERRIDE) {
1366            /* keep the override flag in levels[i] but adjust the flags */
1367            level&=~UBIDI_LEVEL_OVERRIDE;     /* make the range check below simpler */
1368            flags|=DIRPROP_FLAG_O(level);
1369        } else {
1370            /* set the flags */
1371            flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1372        }
1373        if((level<GET_PARALEVEL(pBiDi, i) &&
1374            !((0==level)&&(dirProp==B))) ||
1375           (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
1376            /* level out of bounds */
1377            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1378            return UBIDI_LTR;
1379        }
1380    }
1381    if(flags&MASK_EMBEDDING)
1382        flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1383    /* determine if the text is mixed-directional or single-directional */
1384    pBiDi->flags=flags;
1385    return directionFromFlags(pBiDi);
1386}
1387
1388/******************************************************************
1389 The Properties state machine table
1390*******************************************************************
1391
1392 All table cells are 8 bits:
1393      bits 0..4:  next state
1394      bits 5..7:  action to perform (if > 0)
1395
1396 Cells may be of format "n" where n represents the next state
1397 (except for the rightmost column).
1398 Cells may also be of format "s(x,y)" where x represents an action
1399 to perform and y represents the next state.
1400
1401*******************************************************************
1402 Definitions and type for properties state table
1403*******************************************************************
1404*/
1405#define IMPTABPROPS_COLUMNS 16
1406#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1407#define GET_STATEPROPS(cell) ((cell)&0x1f)
1408#define GET_ACTIONPROPS(cell) ((cell)>>5)
1409#define s(action, newState) ((uint8_t)(newState+(action<<5)))
1410
1411static const uint8_t groupProp[] =          /* dirProp regrouped */
1412{
1413/*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
1414    0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
1415};
1416enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1417
1418/******************************************************************
1419
1420      PROPERTIES  STATE  TABLE
1421
1422 In table impTabProps,
1423      - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1424      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1425      - the Res column is the reduced property assigned to a run
1426
1427 Action 1: process current run1, init new run1
1428        2: init new run2
1429        3: process run1, process run2, init new run1
1430        4: process run1, set run1=run2, init new run2
1431
1432 Notes:
1433  1) This table is used in resolveImplicitLevels().
1434  2) This table triggers actions when there is a change in the Bidi
1435     property of incoming characters (action 1).
1436  3) Most such property sequences are processed immediately (in
1437     fact, passed to processPropertySeq().
1438  4) However, numbers are assembled as one sequence. This means
1439     that undefined situations (like CS following digits, until
1440     it is known if the next char will be a digit) are held until
1441     following chars define them.
1442     Example: digits followed by CS, then comes another CS or ON;
1443              the digits will be processed, then the CS assigned
1444              as the start of an ON sequence (action 3).
1445  5) There are cases where more than one sequence must be
1446     processed, for instance digits followed by CS followed by L:
1447     the digits must be processed as one sequence, and the CS
1448     must be processed as an ON sequence, all this before starting
1449     assembling chars for the opening L sequence.
1450
1451
1452*/
1453static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1454{
1455/*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,   ENL ,   ENR , Res */
1456/* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,    18 ,    21 , DirProp_ON },
1457/* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),s(1,18),s(1,21),  DirProp_L },
1458/* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),s(1,18),s(1,21),  DirProp_R },
1459/* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,s(1,18),s(1,21),  DirProp_R },
1460/* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),    18 ,    21 , DirProp_EN },
1461/* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1462/* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),    18 ,    21 , DirProp_AN },
1463/* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1464/* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1465/* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),    18 ,    21 , DirProp_ON },
1466/*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),    18 ,    21 , DirProp_EN },
1467/*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),    18 ,    21 , DirProp_EN },
1468/*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1469/*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),    18 ,    21 , DirProp_AN },
1470/*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1471/*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_S },
1472/*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),s(1,18),s(1,21),  DirProp_S },
1473/*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_B },
1474/*18 ENL         */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19),    20 ,s(2,19),    18 ,    18 , s(1,3),    18 ,    21 ,  DirProp_L },
1475/*19 ENL+ES/CS   */ { s(3,1), s(3,2),    18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    19 , s(4,7), s(3,3),    18 ,    21 ,  DirProp_L },
1476/*20 ENL+ET      */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    20 , s(1,7),    20 ,    20 , s(1,3),    18 ,    21 ,  DirProp_L },
1477/*21 ENR         */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22),    23 ,s(2,22),    21 ,    21 , s(1,3),    18 ,    21 , DirProp_AN },
1478/*22 ENR+ES/CS   */ { s(3,1), s(3,2),    21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    22 , s(4,7), s(3,3),    18 ,    21 , DirProp_AN },
1479/*23 ENR+ET      */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    23 , s(1,7),    23 ,    23 , s(1,3),    18 ,    21 , DirProp_AN }
1480};
1481
1482/*  we must undef macro s because the levels tables have a different
1483 *  structure (4 bits for action and 4 bits for next state.
1484 */
1485#undef s
1486
1487/******************************************************************
1488 The levels state machine tables
1489*******************************************************************
1490
1491 All table cells are 8 bits:
1492      bits 0..3:  next state
1493      bits 4..7:  action to perform (if > 0)
1494
1495 Cells may be of format "n" where n represents the next state
1496 (except for the rightmost column).
1497 Cells may also be of format "s(x,y)" where x represents an action
1498 to perform and y represents the next state.
1499
1500 This format limits each table to 16 states each and to 15 actions.
1501
1502*******************************************************************
1503 Definitions and type for levels state tables
1504*******************************************************************
1505*/
1506#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1507#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1508#define GET_STATE(cell) ((cell)&0x0f)
1509#define GET_ACTION(cell) ((cell)>>4)
1510#define s(action, newState) ((uint8_t)(newState+(action<<4)))
1511
1512typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1513typedef uint8_t ImpAct[];
1514
1515/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1516 * instead of having a pair of ImpTab and a pair of ImpAct.
1517 */
1518typedef struct ImpTabPair {
1519    const void * pImpTab[2];
1520    const void * pImpAct[2];
1521} ImpTabPair;
1522
1523/******************************************************************
1524
1525      LEVELS  STATE  TABLES
1526
1527 In all levels state tables,
1528      - state 0 is the initial state
1529      - the Res column is the increment to add to the text level
1530        for this property sequence.
1531
1532 The impAct arrays for each table of a pair map the local action
1533 numbers of the table to the total list of actions. For instance,
1534 action 2 in a given table corresponds to the action number which
1535 appears in entry [2] of the impAct array for that table.
1536 The first entry of all impAct arrays must be 0.
1537
1538 Action 1: init conditional sequence
1539        2: prepend conditional sequence to current sequence
1540        3: set ON sequence to new level - 1
1541        4: init EN/AN/ON sequence
1542        5: fix EN/AN/ON sequence followed by R
1543        6: set previous level sequence to level 2
1544
1545 Notes:
1546  1) These tables are used in processPropertySeq(). The input
1547     is property sequences as determined by resolveImplicitLevels.
1548  2) Most such property sequences are processed immediately
1549     (levels are assigned).
1550  3) However, some sequences cannot be assigned a final level till
1551     one or more following sequences are received. For instance,
1552     ON following an R sequence within an even-level paragraph.
1553     If the following sequence is R, the ON sequence will be
1554     assigned basic run level+1, and so will the R sequence.
1555  4) S is generally handled like ON, since its level will be fixed
1556     to paragraph level in adjustWSLevels().
1557
1558*/
1559
1560static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
1561/*  In this table, conditional sequences receive the lower possible level
1562    until proven otherwise.
1563*/
1564{
1565/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1566/* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
1567/* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1568/* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
1569/* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
1570/* 4 : R+ON       */ {     0 , s(2,1), s(3,3), s(3,3),     4 ,     4 ,     0 ,  0 },
1571/* 5 : AN+ON      */ {     0 , s(2,1),     0 , s(3,2),     5 ,     5 ,     0 ,  0 }
1572};
1573static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
1574/*  In this table, conditional sequences receive the lower possible level
1575    until proven otherwise.
1576*/
1577{
1578/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1579/* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1580/* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1581/* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1582/* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
1583/* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
1584/* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
1585};
1586static const ImpAct impAct0 = {0,1,2,3,4};
1587static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1588                                           &impTabR_DEFAULT},
1589                                          {&impAct0, &impAct0}};
1590
1591static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
1592/*  In this table, conditional sequences receive the lower possible level
1593    until proven otherwise.
1594*/
1595{
1596/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1597/* 0 : init       */ {     0 ,     2 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1598/* 1 : L+EN/AN    */ {     0 , s(4,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1599/* 2 : R          */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  1 },
1600/* 3 : R+ON       */ {     0 , s(2,2), s(3,4), s(3,4),     3 ,     3 ,     0 ,  0 },
1601/* 4 : R+EN/AN    */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  2 }
1602};
1603static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1604                                                   &impTabR_DEFAULT},
1605                                                  {&impAct0, &impAct0}};
1606
1607static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1608/*  In this table, EN/AN+ON sequences receive levels as if associated with R
1609    until proven that there is L or sor/eor on both sides. AN is handled like EN.
1610*/
1611{
1612/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1613/* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1614/* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
1615/* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
1616/* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
1617/* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
1618/* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
1619};
1620static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1621/*  In this table, EN/AN+ON sequences receive levels as if associated with R
1622    until proven that there is L on both sides. AN is handled like EN.
1623*/
1624{
1625/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1626/* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1627/* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1628/* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
1629/* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
1630/* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
1631};
1632static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1633                        {&impTabL_GROUP_NUMBERS_WITH_R,
1634                         &impTabR_GROUP_NUMBERS_WITH_R},
1635                        {&impAct0, &impAct0}};
1636
1637
1638static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1639/*  This table is identical to the Default LTR table except that EN and AN are
1640    handled like L.
1641*/
1642{
1643/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1644/* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
1645/* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
1646/* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
1647/* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
1648/* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
1649/* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
1650};
1651static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1652/*  This table is identical to the Default RTL table except that EN and AN are
1653    handled like L.
1654*/
1655{
1656/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1657/* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1658/* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
1659/* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1660/* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
1661/* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
1662/* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
1663};
1664static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1665                        {&impTabL_INVERSE_NUMBERS_AS_L,
1666                         &impTabR_INVERSE_NUMBERS_AS_L},
1667                        {&impAct0, &impAct0}};
1668
1669static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
1670/*  In this table, conditional sequences receive the lower possible level
1671    until proven otherwise.
1672*/
1673{
1674/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1675/* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1676/* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
1677/* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1678/* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
1679/* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
1680/* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
1681/* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
1682};
1683static const ImpAct impAct1 = {0,1,13,14};
1684/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1685 */
1686static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1687                        {&impTabL_DEFAULT,
1688                         &impTabR_INVERSE_LIKE_DIRECT},
1689                        {&impAct0, &impAct1}};
1690
1691static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1692/*  The case handled in this table is (visually):  R EN L
1693*/
1694{
1695/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1696/* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
1697/* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
1698/* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
1699/* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
1700/* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
1701/* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
1702/* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
1703};
1704static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1705/*  The cases handled in this table are (visually):  R EN L
1706                                                     R L AN L
1707*/
1708{
1709/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1710/* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1711/* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
1712/* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
1713/* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
1714/* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
1715/* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
1716/* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
1717};
1718static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1719static const ImpAct impAct3 = {0,1,9,10,11,12};
1720static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1721                        {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1722                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1723                        {&impAct2, &impAct3}};
1724
1725static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1726                        {&impTabL_NUMBERS_SPECIAL,
1727                         &impTabR_INVERSE_LIKE_DIRECT},
1728                        {&impAct0, &impAct1}};
1729
1730static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1731/*  The case handled in this table is (visually):  R EN L
1732*/
1733{
1734/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1735/* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1736/* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
1737/* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
1738/* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
1739/* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
1740};
1741static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1742                        {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1743                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1744                        {&impAct2, &impAct3}};
1745
1746#undef s
1747
1748typedef struct {
1749    const ImpTab * pImpTab;             /* level table pointer          */
1750    const ImpAct * pImpAct;             /* action map array             */
1751    int32_t startON;                    /* start of ON sequence         */
1752    int32_t startL2EN;                  /* start of level 2 sequence    */
1753    int32_t lastStrongRTL;              /* index of last found R or AL  */
1754    int32_t state;                      /* current state                */
1755    int32_t runStart;                   /* start position of the run    */
1756    UBiDiLevel runLevel;                /* run level before implicit solving */
1757} LevState;
1758
1759/*------------------------------------------------------------------------*/
1760
1761static void
1762addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1763  /* param pos:     position where to insert
1764     param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1765  */
1766{
1767#define FIRSTALLOC  10
1768    Point point;
1769    InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1770
1771    if (pInsertPoints->capacity == 0)
1772    {
1773        pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
1774        if (pInsertPoints->points == NULL)
1775        {
1776            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1777            return;
1778        }
1779        pInsertPoints->capacity=FIRSTALLOC;
1780    }
1781    if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1782    {
1783        void * savePoints=pInsertPoints->points;
1784        pInsertPoints->points=uprv_realloc(pInsertPoints->points,
1785                                           pInsertPoints->capacity*2*sizeof(Point));
1786        if (pInsertPoints->points == NULL)
1787        {
1788            pInsertPoints->points=savePoints;
1789            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1790            return;
1791        }
1792        else  pInsertPoints->capacity*=2;
1793    }
1794    point.pos=pos;
1795    point.flag=flag;
1796    pInsertPoints->points[pInsertPoints->size]=point;
1797    pInsertPoints->size++;
1798#undef FIRSTALLOC
1799}
1800
1801static void
1802setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
1803{
1804    DirProp *dirProps=pBiDi->dirProps, dirProp;
1805    UBiDiLevel *levels=pBiDi->levels;
1806    int32_t isolateCount=0, k;
1807    for(k=start; k<limit; k++) {
1808        dirProp=dirProps[k];
1809        if(dirProp==PDI)
1810            isolateCount--;
1811        if(isolateCount==0)
1812            levels[k]=level;
1813        if(dirProp==LRI || dirProp==RLI)
1814            isolateCount++;
1815    }
1816}
1817
1818/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1819
1820/*
1821 * This implementation of the (Wn) rules applies all rules in one pass.
1822 * In order to do so, it needs a look-ahead of typically 1 character
1823 * (except for W5: sequences of ET) and keeps track of changes
1824 * in a rule Wp that affect a later Wq (p<q).
1825 *
1826 * The (Nn) and (In) rules are also performed in that same single loop,
1827 * but effectively one iteration behind for white space.
1828 *
1829 * Since all implicit rules are performed in one step, it is not necessary
1830 * to actually store the intermediate directional properties in dirProps[].
1831 */
1832
1833static void
1834processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1835                   int32_t start, int32_t limit) {
1836    uint8_t cell, oldStateSeq, actionSeq;
1837    const ImpTab * pImpTab=pLevState->pImpTab;
1838    const ImpAct * pImpAct=pLevState->pImpAct;
1839    UBiDiLevel * levels=pBiDi->levels;
1840    UBiDiLevel level, addLevel;
1841    InsertPoints * pInsertPoints;
1842    int32_t start0, k;
1843
1844    start0=start;                           /* save original start position */
1845    oldStateSeq=(uint8_t)pLevState->state;
1846    cell=(*pImpTab)[oldStateSeq][_prop];
1847    pLevState->state=GET_STATE(cell);       /* isolate the new state */
1848    actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1849    addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1850
1851    if(actionSeq) {
1852        switch(actionSeq) {
1853        case 1:                         /* init ON seq */
1854            pLevState->startON=start0;
1855            break;
1856
1857        case 2:                         /* prepend ON seq to current seq */
1858            start=pLevState->startON;
1859            break;
1860
1861        case 3:                         /* EN/AN after R+ON */
1862            level=pLevState->runLevel+1;
1863            setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1864            break;
1865
1866        case 4:                         /* EN/AN before R for NUMBERS_SPECIAL */
1867            level=pLevState->runLevel+2;
1868            setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
1869            break;
1870
1871        case 5:                         /* L or S after possible relevant EN/AN */
1872            /* check if we had EN after R/AL */
1873            if (pLevState->startL2EN >= 0) {
1874                addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1875            }
1876            pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
1877            /* check if we had any relevant EN/AN after R/AL */
1878            pInsertPoints=&(pBiDi->insertPoints);
1879            if ((pInsertPoints->capacity == 0) ||
1880                (pInsertPoints->size <= pInsertPoints->confirmed))
1881            {
1882                /* nothing, just clean up */
1883                pLevState->lastStrongRTL=-1;
1884                /* check if we have a pending conditional segment */
1885                level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1886                if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
1887                    start=pLevState->startON;   /* reset to basic run level */
1888                }
1889                if (_prop == DirProp_S)                /* add LRM before S */
1890                {
1891                    addPoint(pBiDi, start0, LRM_BEFORE);
1892                    pInsertPoints->confirmed=pInsertPoints->size;
1893                }
1894                break;
1895            }
1896            /* reset previous RTL cont to level for LTR text */
1897            for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1898            {
1899                /* reset odd level, leave runLevel+2 as is */
1900                levels[k]=(levels[k] - 2) & ~1;
1901            }
1902            /* mark insert points as confirmed */
1903            pInsertPoints->confirmed=pInsertPoints->size;
1904            pLevState->lastStrongRTL=-1;
1905            if (_prop == DirProp_S)            /* add LRM before S */
1906            {
1907                addPoint(pBiDi, start0, LRM_BEFORE);
1908                pInsertPoints->confirmed=pInsertPoints->size;
1909            }
1910            break;
1911
1912        case 6:                         /* R/AL after possible relevant EN/AN */
1913            /* just clean up */
1914            pInsertPoints=&(pBiDi->insertPoints);
1915            if (pInsertPoints->capacity > 0)
1916                /* remove all non confirmed insert points */
1917                pInsertPoints->size=pInsertPoints->confirmed;
1918            pLevState->startON=-1;
1919            pLevState->startL2EN=-1;
1920            pLevState->lastStrongRTL=limit - 1;
1921            break;
1922
1923        case 7:                         /* EN/AN after R/AL + possible cont */
1924            /* check for real AN */
1925            if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
1926                (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1927            {
1928                /* real AN */
1929                if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1930                {
1931                    /* just note the righmost digit as a strong RTL */
1932                    pLevState->lastStrongRTL=limit - 1;
1933                    break;
1934                }
1935                if (pLevState->startL2EN >= 0)  /* after EN, no AN */
1936                {
1937                    addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1938                    pLevState->startL2EN=-2;
1939                }
1940                /* note AN */
1941                addPoint(pBiDi, start0, LRM_BEFORE);
1942                break;
1943            }
1944            /* if first EN/AN after R/AL */
1945            if (pLevState->startL2EN == -1) {
1946                pLevState->startL2EN=start0;
1947            }
1948            break;
1949
1950        case 8:                         /* note location of latest R/AL */
1951            pLevState->lastStrongRTL=limit - 1;
1952            pLevState->startON=-1;
1953            break;
1954
1955        case 9:                         /* L after R+ON/EN/AN */
1956            /* include possible adjacent number on the left */
1957            for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1958            if(k>=0) {
1959                addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
1960                pInsertPoints=&(pBiDi->insertPoints);
1961                pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
1962            }
1963            pLevState->startON=start0;
1964            break;
1965
1966        case 10:                        /* AN after L */
1967            /* AN numbers between L text on both sides may be trouble. */
1968            /* tentatively bracket with LRMs; will be confirmed if followed by L */
1969            addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
1970            addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
1971            break;
1972
1973        case 11:                        /* R after L+ON/EN/AN */
1974            /* false alert, infirm LRMs around previous AN */
1975            pInsertPoints=&(pBiDi->insertPoints);
1976            pInsertPoints->size=pInsertPoints->confirmed;
1977            if (_prop == DirProp_S)            /* add RLM before S */
1978            {
1979                addPoint(pBiDi, start0, RLM_BEFORE);
1980                pInsertPoints->confirmed=pInsertPoints->size;
1981            }
1982            break;
1983
1984        case 12:                        /* L after L+ON/AN */
1985            level=pLevState->runLevel + addLevel;
1986            for(k=pLevState->startON; k<start0; k++) {
1987                if (levels[k]<level)
1988                    levels[k]=level;
1989            }
1990            pInsertPoints=&(pBiDi->insertPoints);
1991            pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
1992            pLevState->startON=start0;
1993            break;
1994
1995        case 13:                        /* L after L+ON+EN/AN/ON */
1996            level=pLevState->runLevel;
1997            for(k=start0-1; k>=pLevState->startON; k--) {
1998                if(levels[k]==level+3) {
1999                    while(levels[k]==level+3) {
2000                        levels[k--]-=2;
2001                    }
2002                    while(levels[k]==level) {
2003                        k--;
2004                    }
2005                }
2006                if(levels[k]==level+2) {
2007                    levels[k]=level;
2008                    continue;
2009                }
2010                levels[k]=level+1;
2011            }
2012            break;
2013
2014        case 14:                        /* R after L+ON+EN/AN/ON */
2015            level=pLevState->runLevel+1;
2016            for(k=start0-1; k>=pLevState->startON; k--) {
2017                if(levels[k]>level) {
2018                    levels[k]-=2;
2019                }
2020            }
2021            break;
2022
2023        default:                        /* we should never get here */
2024            U_ASSERT(FALSE);
2025            break;
2026        }
2027    }
2028    if((addLevel) || (start < start0)) {
2029        level=pLevState->runLevel + addLevel;
2030        if(start>=pLevState->runStart) {
2031            for(k=start; k<limit; k++) {
2032                levels[k]=level;
2033            }
2034        } else {
2035            setLevelsOutsideIsolates(pBiDi, start, limit, level);
2036        }
2037    }
2038}
2039
2040/**
2041 * Returns the directionality of the last strong character at the end of the prologue, if any.
2042 * Requires prologue!=null.
2043 */
2044static DirProp
2045lastL_R_AL(UBiDi *pBiDi) {
2046    const UChar *text=pBiDi->prologue;
2047    int32_t length=pBiDi->proLength;
2048    int32_t i;
2049    UChar32 uchar;
2050    DirProp dirProp;
2051    for(i=length; i>0; ) {
2052        /* i is decremented by U16_PREV */
2053        U16_PREV(text, 0, i, uchar);
2054        dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2055        if(dirProp==L) {
2056            return DirProp_L;
2057        }
2058        if(dirProp==R || dirProp==AL) {
2059            return DirProp_R;
2060        }
2061        if(dirProp==B) {
2062            return DirProp_ON;
2063        }
2064    }
2065    return DirProp_ON;
2066}
2067
2068/**
2069 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2070 * Requires epilogue!=null.
2071 */
2072static DirProp
2073firstL_R_AL_EN_AN(UBiDi *pBiDi) {
2074    const UChar *text=pBiDi->epilogue;
2075    int32_t length=pBiDi->epiLength;
2076    int32_t i;
2077    UChar32 uchar;
2078    DirProp dirProp;
2079    for(i=0; i<length; ) {
2080        /* i is incremented by U16_NEXT */
2081        U16_NEXT(text, i, length, uchar);
2082        dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2083        if(dirProp==L) {
2084            return DirProp_L;
2085        }
2086        if(dirProp==R || dirProp==AL) {
2087            return DirProp_R;
2088        }
2089        if(dirProp==EN) {
2090            return DirProp_EN;
2091        }
2092        if(dirProp==AN) {
2093            return DirProp_AN;
2094        }
2095    }
2096    return DirProp_ON;
2097}
2098
2099static void
2100resolveImplicitLevels(UBiDi *pBiDi,
2101                      int32_t start, int32_t limit,
2102                      DirProp sor, DirProp eor) {
2103    const DirProp *dirProps=pBiDi->dirProps;
2104    DirProp dirProp;
2105    LevState levState;
2106    int32_t i, start1, start2;
2107    uint16_t oldStateImp, stateImp, actionImp;
2108    uint8_t gprop, resProp, cell;
2109    UBool inverseRTL;
2110    DirProp nextStrongProp=R;
2111    int32_t nextStrongPos=-1;
2112
2113    /* check for RTL inverse BiDi mode */
2114    /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2115     * loop on the text characters from end to start.
2116     * This would need a different properties state table (at least different
2117     * actions) and different levels state tables (maybe very similar to the
2118     * LTR corresponding ones.
2119     */
2120    inverseRTL=(UBool)
2121        ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2122         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
2123          pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2124
2125    /* initialize for property and levels state tables */
2126    levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2127    levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2128    levState.runStart=start;
2129    levState.runLevel=pBiDi->levels[start];
2130    levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2131    levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
2132    if(start==0 && pBiDi->proLength>0) {
2133        DirProp lastStrong=lastL_R_AL(pBiDi);
2134        if(lastStrong!=DirProp_ON) {
2135            sor=lastStrong;
2136        }
2137    }
2138    /* The isolates[] entries contain enough information to
2139       resume the bidi algorithm in the same state as it was
2140       when it was interrupted by an isolate sequence. */
2141    if(dirProps[start]==PDI) {
2142        levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
2143        start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2144        stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2145        levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2146        pBiDi->isolateCount--;
2147    } else {
2148        levState.startON=-1;
2149        start1=start;
2150        if(dirProps[start]==NSM)
2151            stateImp = 1 + sor;
2152        else
2153            stateImp=0;
2154        levState.state=0;
2155        processPropertySeq(pBiDi, &levState, sor, start, start);
2156    }
2157    start2=start;                       /* to make Java compiler happy */
2158
2159    for(i=start; i<=limit; i++) {
2160        if(i>=limit) {
2161            int32_t k;
2162            for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
2163            dirProp=dirProps[k];
2164            if(dirProp==LRI || dirProp==RLI)
2165                break;      /* no forced closing for sequence ending with LRI/RLI */
2166            gprop=eor;
2167        } else {
2168            DirProp prop, prop1;
2169            prop=dirProps[i];
2170            if(inverseRTL) {
2171                if(prop==AL) {
2172                    /* AL before EN does not make it AN */
2173                    prop=R;
2174                } else if(prop==EN) {
2175                    if(nextStrongPos<=i) {
2176                        /* look for next strong char (L/R/AL) */
2177                        int32_t j;
2178                        nextStrongProp=R;   /* set default */
2179                        nextStrongPos=limit;
2180                        for(j=i+1; j<limit; j++) {
2181                            prop1=dirProps[j];
2182                            if(prop1==L || prop1==R || prop1==AL) {
2183                                nextStrongProp=prop1;
2184                                nextStrongPos=j;
2185                                break;
2186                            }
2187                        }
2188                    }
2189                    if(nextStrongProp==AL) {
2190                        prop=AN;
2191                    }
2192                }
2193            }
2194            gprop=groupProp[prop];
2195        }
2196        oldStateImp=stateImp;
2197        cell=impTabProps[oldStateImp][gprop];
2198        stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
2199        actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
2200        if((i==limit) && (actionImp==0)) {
2201            /* there is an unprocessed sequence if its property == eor   */
2202            actionImp=1;                    /* process the last sequence */
2203        }
2204        if(actionImp) {
2205            resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2206            switch(actionImp) {
2207            case 1:             /* process current seq1, init new seq1 */
2208                processPropertySeq(pBiDi, &levState, resProp, start1, i);
2209                start1=i;
2210                break;
2211            case 2:             /* init new seq2 */
2212                start2=i;
2213                break;
2214            case 3:             /* process seq1, process seq2, init new seq1 */
2215                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2216                processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2217                start1=i;
2218                break;
2219            case 4:             /* process seq1, set seq1=seq2, init new seq2 */
2220                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2221                start1=start2;
2222                start2=i;
2223                break;
2224            default:            /* we should never get here */
2225                U_ASSERT(FALSE);
2226                break;
2227            }
2228        }
2229    }
2230
2231    /* flush possible pending sequence, e.g. ON */
2232    if(limit==pBiDi->length && pBiDi->epiLength>0) {
2233        DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2234        if(firstStrong!=DirProp_ON) {
2235            eor=firstStrong;
2236        }
2237    }
2238
2239    /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2240    for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
2241    dirProp=dirProps[i];
2242    if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2243        pBiDi->isolateCount++;
2244        pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2245        pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2246        pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2247        pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
2248    }
2249    else
2250        processPropertySeq(pBiDi, &levState, eor, limit, limit);
2251}
2252
2253/* perform (L1) and (X9) ---------------------------------------------------- */
2254
2255/*
2256 * Reset the embedding levels for some non-graphic characters (L1).
2257 * This function also sets appropriate levels for BN, and
2258 * explicit embedding types that are supposed to have been removed
2259 * from the paragraph in (X9).
2260 */
2261static void
2262adjustWSLevels(UBiDi *pBiDi) {
2263    const DirProp *dirProps=pBiDi->dirProps;
2264    UBiDiLevel *levels=pBiDi->levels;
2265    int32_t i;
2266
2267    if(pBiDi->flags&MASK_WS) {
2268        UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2269        Flags flag;
2270
2271        i=pBiDi->trailingWSStart;
2272        while(i>0) {
2273            /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2274            while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
2275                if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2276                    levels[i]=0;
2277                } else {
2278                    levels[i]=GET_PARALEVEL(pBiDi, i);
2279                }
2280            }
2281
2282            /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2283            /* here, i+1 is guaranteed to be <length */
2284            while(i>0) {
2285                flag=DIRPROP_FLAG(dirProps[--i]);
2286                if(flag&MASK_BN_EXPLICIT) {
2287                    levels[i]=levels[i+1];
2288                } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2289                    levels[i]=0;
2290                    break;
2291                } else if(flag&MASK_B_S) {
2292                    levels[i]=GET_PARALEVEL(pBiDi, i);
2293                    break;
2294                }
2295            }
2296        }
2297    }
2298}
2299
2300U_CAPI void U_EXPORT2
2301ubidi_setContext(UBiDi *pBiDi,
2302                 const UChar *prologue, int32_t proLength,
2303                 const UChar *epilogue, int32_t epiLength,
2304                 UErrorCode *pErrorCode) {
2305    /* check the argument values */
2306    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2307    if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2308       (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2309        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2310        return;
2311    }
2312
2313    if(proLength==-1) {
2314        pBiDi->proLength=u_strlen(prologue);
2315    } else {
2316        pBiDi->proLength=proLength;
2317    }
2318    if(epiLength==-1) {
2319        pBiDi->epiLength=u_strlen(epilogue);
2320    } else {
2321        pBiDi->epiLength=epiLength;
2322    }
2323    pBiDi->prologue=prologue;
2324    pBiDi->epilogue=epilogue;
2325}
2326
2327static void
2328setParaSuccess(UBiDi *pBiDi) {
2329    pBiDi->proLength=0;                 /* forget the last context */
2330    pBiDi->epiLength=0;
2331    pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
2332}
2333
2334#define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
2335#define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
2336
2337static void
2338setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2339                UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
2340    void *runsOnlyMemory;
2341    int32_t *visualMap;
2342    UChar *visualText;
2343    int32_t saveLength, saveTrailingWSStart;
2344    const UBiDiLevel *levels;
2345    UBiDiLevel *saveLevels;
2346    UBiDiDirection saveDirection;
2347    UBool saveMayAllocateText;
2348    Run *runs;
2349    int32_t visualLength, i, j, visualStart, logicalStart,
2350            runCount, runLength, addedRuns, insertRemove,
2351            start, limit, step, indexOddBit, logicalPos,
2352            index0, index1;
2353    uint32_t saveOptions;
2354
2355    pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2356    if(length==0) {
2357        ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2358        goto cleanup3;
2359    }
2360    /* obtain memory for mapping table and visual text */
2361    runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
2362    if(runsOnlyMemory==NULL) {
2363        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2364        goto cleanup3;
2365    }
2366    visualMap=runsOnlyMemory;
2367    visualText=(UChar *)&visualMap[length];
2368    saveLevels=(UBiDiLevel *)&visualText[length];
2369    saveOptions=pBiDi->reorderingOptions;
2370    if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2371        pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2372        pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2373    }
2374    paraLevel&=1;                       /* accept only 0 or 1 */
2375    ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2376    if(U_FAILURE(*pErrorCode)) {
2377        goto cleanup3;
2378    }
2379    /* we cannot access directly pBiDi->levels since it is not yet set if
2380     * direction is not MIXED
2381     */
2382    levels=ubidi_getLevels(pBiDi, pErrorCode);
2383    uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
2384    saveTrailingWSStart=pBiDi->trailingWSStart;
2385    saveLength=pBiDi->length;
2386    saveDirection=pBiDi->direction;
2387
2388    /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2389     * the visual map and the dirProps array to drive the second call
2390     * to ubidi_setPara (but must make provision for possible removal of
2391     * BiDi controls.  Alternatively, only use the dirProps array via
2392     * customized classifier callback.
2393     */
2394    visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2395                                      UBIDI_DO_MIRRORING, pErrorCode);
2396    ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2397    if(U_FAILURE(*pErrorCode)) {
2398        goto cleanup2;
2399    }
2400    pBiDi->reorderingOptions=saveOptions;
2401
2402    pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2403    paraLevel^=1;
2404    /* Because what we did with reorderingOptions, visualText may be shorter
2405     * than the original text. But we don't want the levels memory to be
2406     * reallocated shorter than the original length, since we need to restore
2407     * the levels as after the first call to ubidi_setpara() before returning.
2408     * We will force mayAllocateText to FALSE before the second call to
2409     * ubidi_setpara(), and will restore it afterwards.
2410     */
2411    saveMayAllocateText=pBiDi->mayAllocateText;
2412    pBiDi->mayAllocateText=FALSE;
2413    ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
2414    pBiDi->mayAllocateText=saveMayAllocateText;
2415    ubidi_getRuns(pBiDi, pErrorCode);
2416    if(U_FAILURE(*pErrorCode)) {
2417        goto cleanup1;
2418    }
2419    /* check if some runs must be split, count how many splits */
2420    addedRuns=0;
2421    runCount=pBiDi->runCount;
2422    runs=pBiDi->runs;
2423    visualStart=0;
2424    for(i=0; i<runCount; i++, visualStart+=runLength) {
2425        runLength=runs[i].visualLimit-visualStart;
2426        if(runLength<2) {
2427            continue;
2428        }
2429        logicalStart=GET_INDEX(runs[i].logicalStart);
2430        for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2431            index0=visualMap[j];
2432            index1=visualMap[j-1];
2433            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2434                addedRuns++;
2435            }
2436        }
2437    }
2438    if(addedRuns) {
2439        if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2440            if(runCount==1) {
2441                /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2442                pBiDi->runsMemory[0]=runs[0];
2443            }
2444            runs=pBiDi->runs=pBiDi->runsMemory;
2445            pBiDi->runCount+=addedRuns;
2446        } else {
2447            goto cleanup1;
2448        }
2449    }
2450    /* split runs which are not consecutive in source text */
2451    for(i=runCount-1; i>=0; i--) {
2452        runLength= i==0 ? runs[0].visualLimit :
2453                          runs[i].visualLimit-runs[i-1].visualLimit;
2454        logicalStart=runs[i].logicalStart;
2455        indexOddBit=GET_ODD_BIT(logicalStart);
2456        logicalStart=GET_INDEX(logicalStart);
2457        if(runLength<2) {
2458            if(addedRuns) {
2459                runs[i+addedRuns]=runs[i];
2460            }
2461            logicalPos=visualMap[logicalStart];
2462            runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2463                                            saveLevels[logicalPos]^indexOddBit);
2464            continue;
2465        }
2466        if(indexOddBit) {
2467            start=logicalStart;
2468            limit=logicalStart+runLength-1;
2469            step=1;
2470        } else {
2471            start=logicalStart+runLength-1;
2472            limit=logicalStart;
2473            step=-1;
2474        }
2475        for(j=start; j!=limit; j+=step) {
2476            index0=visualMap[j];
2477            index1=visualMap[j+step];
2478            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2479                logicalPos=BIDI_MIN(visualMap[start], index0);
2480                runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2481                                            saveLevels[logicalPos]^indexOddBit);
2482                runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2483                runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2484                insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2485                runs[i+addedRuns].insertRemove=insertRemove;
2486                runs[i].insertRemove&=~insertRemove;
2487                start=j+step;
2488                addedRuns--;
2489            }
2490        }
2491        if(addedRuns) {
2492            runs[i+addedRuns]=runs[i];
2493        }
2494        logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2495        runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2496                                            saveLevels[logicalPos]^indexOddBit);
2497    }
2498
2499  cleanup1:
2500    /* restore initial paraLevel */
2501    pBiDi->paraLevel^=1;
2502  cleanup2:
2503    /* restore real text */
2504    pBiDi->text=text;
2505    pBiDi->length=saveLength;
2506    pBiDi->originalLength=length;
2507    pBiDi->direction=saveDirection;
2508    /* the saved levels should never excess levelsSize, but we check anyway */
2509    if(saveLength>pBiDi->levelsSize) {
2510        saveLength=pBiDi->levelsSize;
2511    }
2512    uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
2513    pBiDi->trailingWSStart=saveTrailingWSStart;
2514    /* free memory for mapping table and visual text */
2515    uprv_free(runsOnlyMemory);
2516    if(pBiDi->runCount>1) {
2517        pBiDi->direction=UBIDI_MIXED;
2518    }
2519  cleanup3:
2520    pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2521}
2522
2523/* ubidi_setPara ------------------------------------------------------------ */
2524
2525U_CAPI void U_EXPORT2
2526ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2527              UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2528              UErrorCode *pErrorCode) {
2529    UBiDiDirection direction;
2530    DirProp *dirProps;
2531
2532    /* check the argument values */
2533    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2534    if(pBiDi==NULL || text==NULL || length<-1 ||
2535       (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2536        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2537        return;
2538    }
2539
2540    if(length==-1) {
2541        length=u_strlen(text);
2542    }
2543
2544    /* special treatment for RUNS_ONLY mode */
2545    if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2546        setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2547        return;
2548    }
2549
2550    /* initialize the UBiDi structure */
2551    pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
2552    pBiDi->text=text;
2553    pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2554    pBiDi->paraLevel=paraLevel;
2555    pBiDi->direction=paraLevel&1;
2556    pBiDi->paraCount=1;
2557
2558    pBiDi->dirProps=NULL;
2559    pBiDi->levels=NULL;
2560    pBiDi->runs=NULL;
2561    pBiDi->insertPoints.size=0;         /* clean up from last call */
2562    pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
2563
2564    /*
2565     * Save the original paraLevel if contextual; otherwise, set to 0.
2566     */
2567    pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2568
2569    if(length==0) {
2570        /*
2571         * For an empty paragraph, create a UBiDi object with the paraLevel and
2572         * the flags and the direction set but without allocating zero-length arrays.
2573         * There is nothing more to do.
2574         */
2575        if(IS_DEFAULT_LEVEL(paraLevel)) {
2576            pBiDi->paraLevel&=1;
2577            pBiDi->defaultParaLevel=0;
2578        }
2579        pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2580        pBiDi->runCount=0;
2581        pBiDi->paraCount=0;
2582        setParaSuccess(pBiDi);          /* mark successful setPara */
2583        return;
2584    }
2585
2586    pBiDi->runCount=-1;
2587
2588    /* allocate paras memory */
2589    if(pBiDi->parasMemory)
2590        pBiDi->paras=pBiDi->parasMemory;
2591    else
2592        pBiDi->paras=pBiDi->simpleParas;
2593
2594    /*
2595     * Get the directional properties,
2596     * the flags bit-set, and
2597     * determine the paragraph level if necessary.
2598     */
2599    if(getDirPropsMemory(pBiDi, length)) {
2600        pBiDi->dirProps=pBiDi->dirPropsMemory;
2601        if(!getDirProps(pBiDi)) {
2602            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2603            return;
2604        }
2605    } else {
2606        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2607        return;
2608    }
2609    dirProps=pBiDi->dirProps;
2610    /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2611    length= pBiDi->length;
2612    pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
2613
2614    /* are explicit levels specified? */
2615    if(embeddingLevels==NULL) {
2616        /* no: determine explicit levels according to the (Xn) rules */\
2617        if(getLevelsMemory(pBiDi, length)) {
2618            pBiDi->levels=pBiDi->levelsMemory;
2619            direction=resolveExplicitLevels(pBiDi, pErrorCode);
2620            if(U_FAILURE(*pErrorCode)) {
2621                return;
2622            }
2623        } else {
2624            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2625            return;
2626        }
2627    } else {
2628        /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2629        pBiDi->levels=embeddingLevels;
2630        direction=checkExplicitLevels(pBiDi, pErrorCode);
2631        if(U_FAILURE(*pErrorCode)) {
2632            return;
2633        }
2634    }
2635
2636    /* allocate isolate memory */
2637    if(pBiDi->isolateCount<=SIMPLE_ISOLATES_SIZE)
2638        pBiDi->isolates=pBiDi->simpleIsolates;
2639    else
2640        if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
2641            pBiDi->isolates=pBiDi->isolatesMemory;
2642        else {
2643            if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2644                pBiDi->isolates=pBiDi->isolatesMemory;
2645            } else {
2646                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2647                return;
2648            }
2649        }
2650    pBiDi->isolateCount=-1;             /* current isolates stack entry == none */
2651
2652    /*
2653     * The steps after (X9) in the UBiDi algorithm are performed only if
2654     * the paragraph text has mixed directionality!
2655     */
2656    pBiDi->direction=direction;
2657    switch(direction) {
2658    case UBIDI_LTR:
2659        /* make sure paraLevel is even */
2660        pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
2661
2662        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2663        pBiDi->trailingWSStart=0;
2664        break;
2665    case UBIDI_RTL:
2666        /* make sure paraLevel is odd */
2667        pBiDi->paraLevel|=1;
2668
2669        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2670        pBiDi->trailingWSStart=0;
2671        break;
2672    default:
2673        /*
2674         *  Choose the right implicit state table
2675         */
2676        switch(pBiDi->reorderingMode) {
2677        case UBIDI_REORDER_DEFAULT:
2678            pBiDi->pImpTabPair=&impTab_DEFAULT;
2679            break;
2680        case UBIDI_REORDER_NUMBERS_SPECIAL:
2681            pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
2682            break;
2683        case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
2684            pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
2685            break;
2686        case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
2687            pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
2688            break;
2689        case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
2690            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2691                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
2692            } else {
2693                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
2694            }
2695            break;
2696        case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
2697            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2698                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
2699            } else {
2700                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
2701            }
2702            break;
2703        default:
2704            /* we should never get here */
2705            U_ASSERT(FALSE);
2706            break;
2707        }
2708        /*
2709         * If there are no external levels specified and there
2710         * are no significant explicit level codes in the text,
2711         * then we can treat the entire paragraph as one run.
2712         * Otherwise, we need to perform the following rules on runs of
2713         * the text with the same embedding levels. (X10)
2714         * "Significant" explicit level codes are ones that actually
2715         * affect non-BN characters.
2716         * Examples for "insignificant" ones are empty embeddings
2717         * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2718         */
2719        if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
2720                                   !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
2721            resolveImplicitLevels(pBiDi, 0, length,
2722                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
2723                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
2724        } else {
2725            /* sor, eor: start and end types of same-level-run */
2726            UBiDiLevel *levels=pBiDi->levels;
2727            int32_t start, limit=0;
2728            UBiDiLevel level, nextLevel;
2729            DirProp sor, eor;
2730
2731            /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2732            level=GET_PARALEVEL(pBiDi, 0);
2733            nextLevel=levels[0];
2734            if(level<nextLevel) {
2735                eor=GET_LR_FROM_LEVEL(nextLevel);
2736            } else {
2737                eor=GET_LR_FROM_LEVEL(level);
2738            }
2739
2740            do {
2741                /* determine start and limit of the run (end points just behind the run) */
2742
2743                /* the values for this run's start are the same as for the previous run's end */
2744                start=limit;
2745                level=nextLevel;
2746                if((start>0) && (dirProps[start-1]==B)) {
2747                    /* except if this is a new paragraph, then set sor = para level */
2748                    sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
2749                } else {
2750                    sor=eor;
2751                }
2752
2753                /* search for the limit of this run */
2754                while((++limit<length) &&
2755                      ((levels[limit]==level) ||
2756                       (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
2757
2758                /* get the correct level of the next run */
2759                if(limit<length) {
2760                    nextLevel=levels[limit];
2761                } else {
2762                    nextLevel=GET_PARALEVEL(pBiDi, length-1);
2763                }
2764
2765                /* determine eor from max(level, nextLevel); sor is last run's eor */
2766                if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
2767                    eor=GET_LR_FROM_LEVEL(nextLevel);
2768                } else {
2769                    eor=GET_LR_FROM_LEVEL(level);
2770                }
2771
2772                /* if the run consists of overridden directional types, then there
2773                   are no implicit types to be resolved */
2774                if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2775                    resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2776                } else {
2777                    /* remove the UBIDI_LEVEL_OVERRIDE flags */
2778                    do {
2779                        levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2780                    } while(start<limit);
2781                }
2782            } while(limit<length);
2783        }
2784        /* check if we got any memory shortage while adding insert points */
2785        if (U_FAILURE(pBiDi->insertPoints.errorCode))
2786        {
2787            *pErrorCode=pBiDi->insertPoints.errorCode;
2788            return;
2789        }
2790        /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2791        adjustWSLevels(pBiDi);
2792        break;
2793    }
2794    /* add RLM for inverse Bidi with contextual orientation resolving
2795     * to RTL which would not round-trip otherwise
2796     */
2797    if((pBiDi->defaultParaLevel>0) &&
2798       (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2799       ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2800        (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2801        int32_t i, j, start, last;
2802        UBiDiLevel level;
2803        DirProp dirProp;
2804        for(i=0; i<pBiDi->paraCount; i++) {
2805            last=(pBiDi->paras[i].limit)-1;
2806            level=pBiDi->paras[i].level;
2807            if(level==0)
2808                continue;           /* LTR paragraph */
2809            start= i==0 ? 0 : pBiDi->paras[i-1].limit;
2810            for(j=last; j>=start; j--) {
2811                dirProp=dirProps[j];
2812                if(dirProp==L) {
2813                    if(j<last) {
2814                        while(dirProps[last]==B) {
2815                            last--;
2816                        }
2817                    }
2818                    addPoint(pBiDi, last, RLM_BEFORE);
2819                    break;
2820                }
2821                if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2822                    break;
2823                }
2824            }
2825        }
2826    }
2827
2828    if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2829        pBiDi->resultLength -= pBiDi->controlCount;
2830    } else {
2831        pBiDi->resultLength += pBiDi->insertPoints.size;
2832    }
2833    setParaSuccess(pBiDi);              /* mark successful setPara */
2834}
2835
2836U_CAPI void U_EXPORT2
2837ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2838    if(pBiDi!=NULL) {
2839        pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2840    }
2841}
2842
2843U_CAPI UBool U_EXPORT2
2844ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2845    if(pBiDi!=NULL) {
2846        return pBiDi->orderParagraphsLTR;
2847    } else {
2848        return FALSE;
2849    }
2850}
2851
2852U_CAPI UBiDiDirection U_EXPORT2
2853ubidi_getDirection(const UBiDi *pBiDi) {
2854    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2855        return pBiDi->direction;
2856    } else {
2857        return UBIDI_LTR;
2858    }
2859}
2860
2861U_CAPI const UChar * U_EXPORT2
2862ubidi_getText(const UBiDi *pBiDi) {
2863    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2864        return pBiDi->text;
2865    } else {
2866        return NULL;
2867    }
2868}
2869
2870U_CAPI int32_t U_EXPORT2
2871ubidi_getLength(const UBiDi *pBiDi) {
2872    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2873        return pBiDi->originalLength;
2874    } else {
2875        return 0;
2876    }
2877}
2878
2879U_CAPI int32_t U_EXPORT2
2880ubidi_getProcessedLength(const UBiDi *pBiDi) {
2881    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2882        return pBiDi->length;
2883    } else {
2884        return 0;
2885    }
2886}
2887
2888U_CAPI int32_t U_EXPORT2
2889ubidi_getResultLength(const UBiDi *pBiDi) {
2890    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2891        return pBiDi->resultLength;
2892    } else {
2893        return 0;
2894    }
2895}
2896
2897/* paragraphs API functions ------------------------------------------------- */
2898
2899U_CAPI UBiDiLevel U_EXPORT2
2900ubidi_getParaLevel(const UBiDi *pBiDi) {
2901    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2902        return pBiDi->paraLevel;
2903    } else {
2904        return 0;
2905    }
2906}
2907
2908U_CAPI int32_t U_EXPORT2
2909ubidi_countParagraphs(UBiDi *pBiDi) {
2910    if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2911        return 0;
2912    } else {
2913        return pBiDi->paraCount;
2914    }
2915}
2916
2917U_CAPI void U_EXPORT2
2918ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2919                          int32_t *pParaStart, int32_t *pParaLimit,
2920                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2921    int32_t paraStart;
2922
2923    /* check the argument values */
2924    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2925    RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2926    RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2927
2928    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2929    if(paraIndex) {
2930        paraStart=pBiDi->paras[paraIndex-1].limit;
2931    } else {
2932        paraStart=0;
2933    }
2934    if(pParaStart!=NULL) {
2935        *pParaStart=paraStart;
2936    }
2937    if(pParaLimit!=NULL) {
2938        *pParaLimit=pBiDi->paras[paraIndex].limit;
2939    }
2940    if(pParaLevel!=NULL) {
2941        *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2942    }
2943}
2944
2945U_CAPI int32_t U_EXPORT2
2946ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2947                          int32_t *pParaStart, int32_t *pParaLimit,
2948                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2949    int32_t paraIndex;
2950
2951    /* check the argument values */
2952    /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2953    RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2954    RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2955    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2956    RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2957
2958    for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
2959    ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2960    return paraIndex;
2961}
2962
2963U_CAPI void U_EXPORT2
2964ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2965                       const void *newContext, UBiDiClassCallback **oldFn,
2966                       const void **oldContext, UErrorCode *pErrorCode)
2967{
2968    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2969    if(pBiDi==NULL) {
2970        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2971        return;
2972    }
2973    if( oldFn )
2974    {
2975        *oldFn = pBiDi->fnClassCallback;
2976    }
2977    if( oldContext )
2978    {
2979        *oldContext = pBiDi->coClassCallback;
2980    }
2981    pBiDi->fnClassCallback = newFn;
2982    pBiDi->coClassCallback = newContext;
2983}
2984
2985U_CAPI void U_EXPORT2
2986ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
2987{
2988    if(pBiDi==NULL) {
2989        return;
2990    }
2991    if( fn )
2992    {
2993        *fn = pBiDi->fnClassCallback;
2994    }
2995    if( context )
2996    {
2997        *context = pBiDi->coClassCallback;
2998    }
2999}
3000
3001U_CAPI UCharDirection U_EXPORT2
3002ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3003{
3004    UCharDirection dir;
3005
3006    if( pBiDi->fnClassCallback == NULL ||
3007        (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3008    {
3009        dir = ubidi_getClass(pBiDi->bdp, c);
3010    }
3011    if(dir >= U_CHAR_DIRECTION_COUNT) {
3012        dir = ON;
3013    }
3014    return dir;
3015}
3016