1/*
2******************************************************************************
3*
4*   Copyright (C) 1999-2013, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  ubidi.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 1999jul27
14*   created by: Markus W. Scherer, updated by Matitiahu Allouche
15*
16*/
17
18#include "cmemory.h"
19#include "unicode/utypes.h"
20#include "unicode/ustring.h"
21#include "unicode/uchar.h"
22#include "unicode/ubidi.h"
23#include "unicode/utf16.h"
24#include "ubidi_props.h"
25#include "ubidiimp.h"
26#include "uassert.h"
27
28/*
29 * General implementation notes:
30 *
31 * Throughout the implementation, there are comments like (W2) that refer to
32 * rules of the BiDi algorithm in its version 5, in this example to the second
33 * rule of the resolution of weak types.
34 *
35 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
36 * character according to UTF-16, the second UChar gets the directional property of
37 * the entire character assigned, while the first one gets a BN, a boundary
38 * neutral, type, which is ignored by most of the algorithm according to
39 * rule (X9) and the implementation suggestions of the BiDi algorithm.
40 *
41 * Later, adjustWSLevels() will set the level for each BN to that of the
42 * following character (UChar), which results in surrogate pairs getting the
43 * same level on each of their surrogates.
44 *
45 * In a UTF-8 implementation, the same thing could be done: the last byte of
46 * a multi-byte sequence would get the "real" property, while all previous
47 * bytes of that sequence would get BN.
48 *
49 * It is not possible to assign all those parts of a character the same real
50 * property because this would fail in the resolution of weak types with rules
51 * that look at immediately surrounding types.
52 *
53 * As a related topic, this implementation does not remove Boundary Neutral
54 * types from the input, but ignores them wherever this is relevant.
55 * For example, the loop for the resolution of the weak types reads
56 * types until it finds a non-BN.
57 * Also, explicit embedding codes are neither changed into BN nor removed.
58 * They are only treated the same way real BNs are.
59 * As stated before, adjustWSLevels() takes care of them at the end.
60 * For the purpose of conformance, the levels of all these codes
61 * do not matter.
62 *
63 * Note that this implementation never modifies the dirProps
64 * after the initial setup, except for FSI which is changed to either
65 * LRI or RLI in getDirProps(), and paired brackets which may be changed
66 * to L or R according to N0.
67 *
68 *
69 * In this implementation, the resolution of weak types (Wn),
70 * neutrals (Nn), and the assignment of the resolved level (In)
71 * are all done in one single loop, in resolveImplicitLevels().
72 * Changes of dirProp values are done on the fly, without writing
73 * them back to the dirProps array.
74 *
75 *
76 * This implementation contains code that allows to bypass steps of the
77 * algorithm that are not needed on the specific paragraph
78 * in order to speed up the most common cases considerably,
79 * like text that is entirely LTR, or RTL text without numbers.
80 *
81 * Most of this is done by setting a bit for each directional property
82 * in a flags variable and later checking for whether there are
83 * any LTR characters or any RTL characters, or both, whether
84 * there are any explicit embedding codes, etc.
85 *
86 * If the (Xn) steps are performed, then the flags are re-evaluated,
87 * because they will then not contain the embedding codes any more
88 * and will be adjusted for override codes, so that subsequently
89 * more bypassing may be possible than what the initial flags suggested.
90 *
91 * If the text is not mixed-directional, then the
92 * algorithm steps for the weak type resolution are not performed,
93 * and all levels are set to the paragraph level.
94 *
95 * If there are no explicit embedding codes, then the (Xn) steps
96 * are not performed.
97 *
98 * If embedding levels are supplied as a parameter, then all
99 * explicit embedding codes are ignored, and the (Xn) steps
100 * are not performed.
101 *
102 * White Space types could get the level of the run they belong to,
103 * and are checked with a test of (flags&MASK_EMBEDDING) to
104 * consider if the paragraph direction should be considered in
105 * the flags variable.
106 *
107 * If there are no White Space types in the paragraph, then
108 * (L1) is not necessary in adjustWSLevels().
109 */
110
111/* to avoid some conditional statements, use tiny constant arrays */
112static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
113static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
114static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
115
116#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
117#define DIRPROP_FLAG_E(level) flagE[(level)&1]
118#define DIRPROP_FLAG_O(level) flagO[(level)&1]
119
120#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
121
122/* UBiDi object management -------------------------------------------------- */
123
124U_CAPI UBiDi * U_EXPORT2
125ubidi_open(void)
126{
127    UErrorCode errorCode=U_ZERO_ERROR;
128    return ubidi_openSized(0, 0, &errorCode);
129}
130
131U_CAPI UBiDi * U_EXPORT2
132ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
133    UBiDi *pBiDi;
134
135    /* check the argument values */
136    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
137        return NULL;
138    } else if(maxLength<0 || maxRunCount<0) {
139        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
140        return NULL;    /* invalid arguments */
141    }
142
143    /* allocate memory for the object */
144    pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
145    if(pBiDi==NULL) {
146        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
147        return NULL;
148    }
149
150    /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
151    uprv_memset(pBiDi, 0, sizeof(UBiDi));
152
153    /* get BiDi properties */
154    pBiDi->bdp=ubidi_getSingleton();
155
156    /* allocate memory for arrays as requested */
157    if(maxLength>0) {
158        if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
159            !getInitialLevelsMemory(pBiDi, maxLength)
160        ) {
161            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
162        }
163    } else {
164        pBiDi->mayAllocateText=TRUE;
165    }
166
167    if(maxRunCount>0) {
168        if(maxRunCount==1) {
169            /* use simpleRuns[] */
170            pBiDi->runsSize=sizeof(Run);
171        } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
172            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
173        }
174    } else {
175        pBiDi->mayAllocateRuns=TRUE;
176    }
177
178    if(U_SUCCESS(*pErrorCode)) {
179        return pBiDi;
180    } else {
181        ubidi_close(pBiDi);
182        return NULL;
183    }
184}
185
186/*
187 * We are allowed to allocate memory if memory==NULL or
188 * mayAllocate==TRUE for each array that we need.
189 * We also try to grow memory as needed if we
190 * allocate it.
191 *
192 * Assume sizeNeeded>0.
193 * If *pMemory!=NULL, then assume *pSize>0.
194 *
195 * ### this realloc() may unnecessarily copy the old data,
196 * which we know we don't need any more;
197 * is this the best way to do this??
198 */
199U_CFUNC UBool
200ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
201    void **pMemory = (void **)bidiMem;
202    /* check for existing memory */
203    if(*pMemory==NULL) {
204        /* we need to allocate memory */
205        if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
206            *pSize=sizeNeeded;
207            return TRUE;
208        } else {
209            return FALSE;
210        }
211    } else {
212        if(sizeNeeded<=*pSize) {
213            /* there is already enough memory */
214            return TRUE;
215        }
216        else if(!mayAllocate) {
217            /* not enough memory, and we must not allocate */
218            return FALSE;
219        } else {
220            /* we try to grow */
221            void *memory;
222            /* in most cases, we do not need the copy-old-data part of
223             * realloc, but it is needed when adding runs using getRunsMemory()
224             * in setParaRunsOnly()
225             */
226            if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
227                *pMemory=memory;
228                *pSize=sizeNeeded;
229                return TRUE;
230            } else {
231                /* we failed to grow */
232                return FALSE;
233            }
234        }
235    }
236}
237
238U_CAPI void U_EXPORT2
239ubidi_close(UBiDi *pBiDi) {
240    if(pBiDi!=NULL) {
241        pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
242        if(pBiDi->dirPropsMemory!=NULL) {
243            uprv_free(pBiDi->dirPropsMemory);
244        }
245        if(pBiDi->levelsMemory!=NULL) {
246            uprv_free(pBiDi->levelsMemory);
247        }
248        if(pBiDi->openingsMemory!=NULL) {
249            uprv_free(pBiDi->openingsMemory);
250        }
251        if(pBiDi->parasMemory!=NULL) {
252            uprv_free(pBiDi->parasMemory);
253        }
254        if(pBiDi->runsMemory!=NULL) {
255            uprv_free(pBiDi->runsMemory);
256        }
257        if(pBiDi->isolatesMemory!=NULL) {
258            uprv_free(pBiDi->isolatesMemory);
259        }
260        if(pBiDi->insertPoints.points!=NULL) {
261            uprv_free(pBiDi->insertPoints.points);
262        }
263
264        uprv_free(pBiDi);
265    }
266}
267
268/* set to approximate "inverse BiDi" ---------------------------------------- */
269
270U_CAPI void U_EXPORT2
271ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
272    if(pBiDi!=NULL) {
273        pBiDi->isInverse=isInverse;
274        pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
275                                          : UBIDI_REORDER_DEFAULT;
276    }
277}
278
279U_CAPI UBool U_EXPORT2
280ubidi_isInverse(UBiDi *pBiDi) {
281    if(pBiDi!=NULL) {
282        return pBiDi->isInverse;
283    } else {
284        return FALSE;
285    }
286}
287
288/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
289 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
290 * concept of RUNS_ONLY which is a double operation.
291 * It could be advantageous to divide this into 3 concepts:
292 * a) Operation: direct / inverse / RUNS_ONLY
293 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
294 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
295 * This would allow combinations not possible today like RUNS_ONLY with
296 * NUMBERS_SPECIAL.
297 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
298 * REMOVE_CONTROLS for the inverse step.
299 * Not all combinations would be supported, and probably not all do make sense.
300 * This would need to document which ones are supported and what are the
301 * fallbacks for unsupported combinations.
302 */
303U_CAPI void U_EXPORT2
304ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
305    if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
306                        && (reorderingMode < UBIDI_REORDER_COUNT)) {
307        pBiDi->reorderingMode = reorderingMode;
308        pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
309    }
310}
311
312U_CAPI UBiDiReorderingMode U_EXPORT2
313ubidi_getReorderingMode(UBiDi *pBiDi) {
314    if (pBiDi!=NULL) {
315        return pBiDi->reorderingMode;
316    } else {
317        return UBIDI_REORDER_DEFAULT;
318    }
319}
320
321U_CAPI void U_EXPORT2
322ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
323    if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
324        reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
325    }
326    if (pBiDi!=NULL) {
327        pBiDi->reorderingOptions=reorderingOptions;
328    }
329}
330
331U_CAPI uint32_t U_EXPORT2
332ubidi_getReorderingOptions(UBiDi *pBiDi) {
333    if (pBiDi!=NULL) {
334        return pBiDi->reorderingOptions;
335    } else {
336        return 0;
337    }
338}
339
340U_CAPI UBiDiDirection U_EXPORT2
341ubidi_getBaseDirection(const UChar *text,
342int32_t length){
343
344    int32_t i;
345    UChar32 uchar;
346    UCharDirection dir;
347
348    if( text==NULL || length<-1 ){
349        return UBIDI_NEUTRAL;
350    }
351
352    if(length==-1) {
353        length=u_strlen(text);
354    }
355
356    for( i = 0 ; i < length; ) {
357        /* i is incremented by U16_NEXT */
358        U16_NEXT(text, i, length, uchar);
359        dir = u_charDirection(uchar);
360        if( dir == U_LEFT_TO_RIGHT )
361                return UBIDI_LTR;
362        if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
363                return UBIDI_RTL;
364    }
365    return UBIDI_NEUTRAL;
366}
367
368/* perform (P2)..(P3) ------------------------------------------------------- */
369
370/**
371 * Returns the directionality of the first strong character
372 * after the last B in prologue, if any.
373 * Requires prologue!=null.
374 */
375static DirProp
376firstL_R_AL(UBiDi *pBiDi) {
377    const UChar *text=pBiDi->prologue;
378    int32_t length=pBiDi->proLength;
379    int32_t i;
380    UChar32 uchar;
381    DirProp dirProp, result=ON;
382    for(i=0; i<length; ) {
383        /* i is incremented by U16_NEXT */
384        U16_NEXT(text, i, length, uchar);
385        dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
386        if(result==ON) {
387            if(dirProp==L || dirProp==R || dirProp==AL) {
388                result=dirProp;
389            }
390        } else {
391            if(dirProp==B) {
392                result=ON;
393            }
394        }
395    }
396    return result;
397}
398
399/*
400 * Check that there are enough entries in the array pointed to by pBiDi->paras
401 */
402static UBool
403checkParaCount(UBiDi *pBiDi) {
404    int32_t count=pBiDi->paraCount;
405    if(pBiDi->paras==pBiDi->simpleParas) {
406        if(count<=SIMPLE_PARAS_SIZE)
407            return TRUE;
408        if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_SIZE * 2))
409            return FALSE;
410        pBiDi->paras=pBiDi->parasMemory;
411        uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_SIZE * sizeof(Para));
412        return TRUE;
413    }
414    if(!getInitialParasMemory(pBiDi, count * 2))
415        return FALSE;
416    pBiDi->paras=pBiDi->parasMemory;
417    return TRUE;
418}
419
420/*
421 * Get the directional properties for the text, calculate the flags bit-set, and
422 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
423 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
424 */
425static UBool
426getDirProps(UBiDi *pBiDi) {
427    const UChar *text=pBiDi->text;
428    DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
429
430    int32_t i=0, originalLength=pBiDi->originalLength;
431    Flags flags=0;      /* collect all directionalities in the text */
432    UChar32 uchar;
433    DirProp dirProp=0, defaultParaLevel=0;  /* initialize to avoid compiler warnings */
434    UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
435    /* for inverse BiDi, the default para level is set to RTL if there is a
436       strong R or AL character at either end of the text                            */
437    UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
438            (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
439             pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
440    int32_t lastArabicPos=-1;
441    int32_t controlCount=0;
442    UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
443                                       UBIDI_OPTION_REMOVE_CONTROLS);
444
445    typedef enum {
446         NOT_SEEKING_STRONG,            /* 0: not contextual paraLevel, not after FSI */
447         SEEKING_STRONG_FOR_PARA,       /* 1: looking for first strong char in para */
448         SEEKING_STRONG_FOR_FSI,        /* 2: looking for first strong after FSI */
449         LOOKING_FOR_PDI                /* 3: found strong after FSI, looking for PDI */
450    } State;
451    State state;
452    DirProp lastStrong=ON;              /* for default level & inverse BiDi */
453    /* The following stacks are used to manage isolate sequences. Those
454       sequences may be nested, but obviously never more deeply than the
455       maximum explicit embedding level.
456       lastStack is the index of the last used entry in the stack. A value of -1
457       means that there is no open isolate sequence.
458       lastStack is reset to -1 on paragraph boundaries. */
459    /* The following stack contains the position of the initiator of
460       each open isolate sequence */
461    int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
462    /* The following stack contains the last known state before
463       encountering the initiator of an isolate sequence */
464    int8_t  previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
465    int32_t stackLast=-1;
466
467    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
468        pBiDi->length=0;
469    defaultParaLevel=pBiDi->paraLevel&1;
470    if(isDefaultLevel) {
471        pBiDi->paras[0].level=defaultParaLevel;
472        lastStrong=defaultParaLevel;
473        if(pBiDi->proLength>0 &&                    /* there is a prologue */
474           (dirProp=firstL_R_AL(pBiDi))!=ON) {  /* with a strong character */
475            if(dirProp==L)
476                pBiDi->paras[0].level=0;    /* set the default para level */
477            else
478                pBiDi->paras[0].level=1;    /* set the default para level */
479            state=NOT_SEEKING_STRONG;
480        } else {
481            state=SEEKING_STRONG_FOR_PARA;
482        }
483    } else {
484        pBiDi->paras[0].level=pBiDi->paraLevel;
485        state=NOT_SEEKING_STRONG;
486    }
487    /* count paragraphs and determine the paragraph level (P2..P3) */
488    /*
489     * see comment in ubidi.h:
490     * the UBIDI_DEFAULT_XXX values are designed so that
491     * their bit 0 alone yields the intended default
492     */
493    for( /* i=0 above */ ; i<originalLength; ) {
494        /* i is incremented by U16_NEXT */
495        U16_NEXT(text, i, originalLength, uchar);
496        flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
497        dirProps[i-1]=dirProp;
498        if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
499            flags|=DIRPROP_FLAG(BN);
500            dirProps[i-2]=BN;
501        }
502        if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
503            controlCount++;
504        if(dirProp==L) {
505            if(state==SEEKING_STRONG_FOR_PARA) {
506                pBiDi->paras[pBiDi->paraCount-1].level=0;
507                state=NOT_SEEKING_STRONG;
508            }
509            else if(state==SEEKING_STRONG_FOR_FSI) {
510                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
511                    dirProps[isolateStartStack[stackLast]]=LRI;
512                    flags|=DIRPROP_FLAG(LRI);
513                }
514                state=LOOKING_FOR_PDI;
515            }
516            lastStrong=L;
517            continue;
518        }
519        if(dirProp==R || dirProp==AL) {
520            if(state==SEEKING_STRONG_FOR_PARA) {
521                pBiDi->paras[pBiDi->paraCount-1].level=1;
522                state=NOT_SEEKING_STRONG;
523            }
524            else if(state==SEEKING_STRONG_FOR_FSI) {
525                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
526                    dirProps[isolateStartStack[stackLast]]=RLI;
527                    flags|=DIRPROP_FLAG(RLI);
528                }
529                state=LOOKING_FOR_PDI;
530            }
531            lastStrong=R;
532            if(dirProp==AL)
533                lastArabicPos=i-1;
534            continue;
535        }
536        if(dirProp>=FSI && dirProp<=RLI) {  /* FSI, LRI or RLI */
537            stackLast++;
538            if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
539                isolateStartStack[stackLast]=i-1;
540                previousStateStack[stackLast]=state;
541            }
542            if(dirProp==FSI)
543                state=SEEKING_STRONG_FOR_FSI;
544            else
545                state=LOOKING_FOR_PDI;
546            continue;
547        }
548        if(dirProp==PDI) {
549            if(state==SEEKING_STRONG_FOR_FSI) {
550                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
551                    dirProps[isolateStartStack[stackLast]]=LRI;
552                    flags|=DIRPROP_FLAG(LRI);
553                }
554            }
555            if(stackLast>=0) {
556                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
557                    state=previousStateStack[stackLast];
558                stackLast--;
559            }
560            continue;
561        }
562        if(dirProp==B) {
563            if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
564                continue;
565            pBiDi->paras[pBiDi->paraCount-1].limit=i;
566            if(isDefaultLevelInverse && lastStrong==R)
567                pBiDi->paras[pBiDi->paraCount-1].level=1;
568            if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
569                /* When streaming, we only process whole paragraphs
570                   thus some updates are only done on paragraph boundaries */
571                pBiDi->length=i;        /* i is index to next character */
572                pBiDi->controlCount=controlCount;
573            }
574            if(i<originalLength) {              /* B not last char in text */
575                pBiDi->paraCount++;
576                if(checkParaCount(pBiDi)==FALSE)    /* not enough memory for a new para entry */
577                    return FALSE;
578                if(isDefaultLevel) {
579                    pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
580                    state=SEEKING_STRONG_FOR_PARA;
581                    lastStrong=defaultParaLevel;
582                } else {
583                    pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
584                    state=NOT_SEEKING_STRONG;
585                }
586                stackLast=-1;
587            }
588            continue;
589        }
590    }
591    /* Ignore still open isolate sequences with overflow */
592    if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
593        stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
594        if(dirProps[previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL]]!=FSI)
595            state=LOOKING_FOR_PDI;
596    }
597    /* Resolve direction of still unresolved open FSI sequences */
598    while(stackLast>=0) {
599        if(state==SEEKING_STRONG_FOR_FSI) {
600            dirProps[isolateStartStack[stackLast]]=LRI;
601            flags|=DIRPROP_FLAG(LRI);
602        }
603        state=previousStateStack[stackLast];
604        stackLast--;
605    }
606    /* When streaming, ignore text after the last paragraph separator */
607    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
608        if(pBiDi->length<originalLength)
609            pBiDi->paraCount--;
610    } else {
611        pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
612        pBiDi->controlCount=controlCount;
613    }
614    /* For inverse bidi, default para direction is RTL if there is
615       a strong R or AL at either end of the paragraph */
616    if(isDefaultLevelInverse && lastStrong==R) {
617        pBiDi->paras[pBiDi->paraCount-1].level=1;
618    }
619    if(isDefaultLevel) {
620        pBiDi->paraLevel=pBiDi->paras[0].level;
621    }
622    /* The following is needed to resolve the text direction for default level
623       paragraphs containing no strong character */
624    for(i=0; i<pBiDi->paraCount; i++)
625        flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
626
627    if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
628        flags|=DIRPROP_FLAG(L);
629    }
630    pBiDi->flags=flags;
631    pBiDi->lastArabicPos=lastArabicPos;
632    return TRUE;
633}
634
635/* determine the paragraph level at position index */
636U_CFUNC UBiDiLevel
637ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
638    int32_t i;
639    for(i=0; i<pBiDi->paraCount; i++)
640        if(pindex<pBiDi->paras[i].limit)
641            break;
642    if(i>=pBiDi->paraCount)
643        i=pBiDi->paraCount-1;
644    return (UBiDiLevel)(pBiDi->paras[i].level);
645}
646
647/* Functions for handling paired brackets ----------------------------------- */
648
649/* In the isoRuns array, the first entry is used for text outside of any
650   isolate sequence.  Higher entries are used for each more deeply nested
651   isolate sequence. isoRunLast is the index of the last used entry.  The
652   openings array is used to note the data of opening brackets not yet
653   matched by a closing bracket, or matched but still susceptible to change
654   level.
655   Each isoRun entry contains the index of the first and
656   one-after-last openings entries for pending opening brackets it
657   contains.  The next openings entry to use is the one-after-last of the
658   most deeply nested isoRun entry.
659   isoRun entries also contain their current embedding level and the last
660   encountered strong character, since these will be needed to resolve
661   the level of paired brackets.  */
662
663static void
664bracketInit(UBiDi *pBiDi, BracketData *bd) {
665    bd->pBiDi=pBiDi;
666    bd->isoRunLast=0;
667    bd->isoRuns[0].start=0;
668    bd->isoRuns[0].limit=0;
669    bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
670    bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1;
671    bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0;
672    if(pBiDi->openingsMemory) {
673        bd->openings=pBiDi->openingsMemory;
674        bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
675    } else {
676        bd->openings=bd->simpleOpenings;
677        bd->openingsCount=SIMPLE_OPENINGS_SIZE;
678    }
679    bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
680                         bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
681}
682
683/* paragraph boundary */
684static void
685bracketProcessB(BracketData *bd, UBiDiLevel level) {
686    bd->isoRunLast=0;
687    bd->isoRuns[0].limit=0;
688    bd->isoRuns[0].level=level;
689    bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=level&1;
690    bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0;
691}
692
693/* LRE, LRO, RLE, RLO, PDF */
694static void
695bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
696                       UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
697    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
698    DirProp *dirProps=bd->pBiDi->dirProps;
699    if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO)  /* after an isolate */
700        return;
701    if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)>
702       (contextLevel&~UBIDI_LEVEL_OVERRIDE))        /* not a PDF */
703        contextLevel=embeddingLevel;
704    pLastIsoRun->limit=pLastIsoRun->start;
705    pLastIsoRun->level=embeddingLevel;
706    pLastIsoRun->lastStrong=pLastIsoRun->contextDir=contextLevel&1;
707    pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=lastCcPos;
708}
709
710/* LRI or RLI */
711static void
712bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
713    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
714    int16_t lastLimit;
715    lastLimit=pLastIsoRun->limit;
716    bd->isoRunLast++;
717    pLastIsoRun++;
718    pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
719    pLastIsoRun->level=level;
720    pLastIsoRun->lastStrong=pLastIsoRun->contextDir=level&1;
721    pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=0;
722}
723
724/* PDI */
725static void
726bracketProcessPDI(BracketData *bd) {
727    bd->isoRunLast--;
728}
729
730/* newly found opening bracket: create an openings entry */
731static UBool                            /* return TRUE if success */
732bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
733    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
734    Opening *pOpening;
735    if(pLastIsoRun->limit>=bd->openingsCount) {  /* no available new entry */
736        UBiDi *pBiDi=bd->pBiDi;
737        if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
738            return FALSE;
739        if(bd->openings==bd->simpleOpenings)
740            uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
741                        SIMPLE_OPENINGS_SIZE * sizeof(Opening));
742        bd->openings=pBiDi->openingsMemory;     /* may have changed */
743        bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
744    }
745    pOpening=&bd->openings[pLastIsoRun->limit];
746    pOpening->position=position;
747    pOpening->match=match;
748    pOpening->contextDir=pLastIsoRun->contextDir;
749    pOpening->contextPos=pLastIsoRun->contextPos;
750    pOpening->flags=0;
751    pLastIsoRun->limit++;
752    return TRUE;
753}
754
755/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
756static void
757fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
758    /* This function calls itself recursively */
759    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
760    Opening *qOpening;
761    DirProp *dirProps=bd->pBiDi->dirProps;
762    int32_t k, openingPosition, closingPosition;
763    for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
764        if(qOpening->match>=0)      /* not an N0c match */
765            continue;
766        if(newPropPosition<qOpening->contextPos)
767            break;
768        if(newPropPosition>=qOpening->position)
769            continue;
770        if(newProp==qOpening->contextDir)
771            break;
772        openingPosition=qOpening->position;
773        dirProps[openingPosition]=dirProps[newPropPosition];
774        closingPosition=-(qOpening->match);
775        dirProps[closingPosition]= newProp; /* can never be AL */
776        qOpening->match=0;                  /* prevent further changes */
777        fixN0c(bd, k, openingPosition, newProp);
778        fixN0c(bd, k, closingPosition, newProp);
779    }
780}
781
782/* handle strong characters, digits and candidates for closing brackets */
783static UBool                            /* return TRUE if success */
784bracketProcessChar(BracketData *bd, int32_t position, DirProp dirProp) {
785    IsoRun *pLastIsoRun;
786    Opening *pOpening, *qOpening;
787    DirProp *dirProps, newProp;
788    UBiDiDirection direction;
789    uint16_t flag;
790    int32_t i, k;
791    UBool stable;
792    UChar c, match;
793    dirProps=bd->pBiDi->dirProps;
794    if(DIRPROP_FLAG(dirProp)&MASK_STRONG_EN_AN) { /* L, R, AL, EN or AN */
795        pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
796        /* AN after R or AL becomes R or AL; after L or L+AN, it is kept as-is */
797        if(dirProp==AN && (pLastIsoRun->lastStrong==R || pLastIsoRun->lastStrong==AL))
798            dirProp=pLastIsoRun->lastStrong;
799        /* EN after L or L+AN becomes L; after R or AL, it becomes R or AL */
800        if(dirProp==EN) {
801            if(pLastIsoRun->lastStrong==L || pLastIsoRun->lastStrong==AN) {
802                dirProp=L;
803                if(!bd->isNumbersSpecial)
804                    dirProps[position]=ENL;
805            }
806            else {
807                dirProp=pLastIsoRun->lastStrong;    /* may be R or AL */
808                if(!bd->isNumbersSpecial)
809                    dirProps[position]= dirProp==AL ? AN : ENR;
810            }
811        }
812        pLastIsoRun->lastStrong=dirProp;
813        pLastIsoRun->contextDir=DIR_FROM_STRONG(dirProp);
814        pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=position;
815        if(dirProp==AL || dirProp==AN)
816            dirProp=R;
817        flag=DIRPROP_FLAG(dirProp);
818        /* strong characters found after an unmatched opening bracket
819           must be noted for possibly applying N0b */
820        for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
821            bd->openings[i].flags|=flag;
822        return TRUE;
823    }
824    if(dirProp!=ON)
825        return TRUE;
826    /* First see if it is a matching closing bracket. Hopefully, this is more
827       efficient than checking if it is a closing bracket at all */
828    c=bd->pBiDi->text[position];
829    pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
830    for(i=pLastIsoRun->limit-1; i>=pLastIsoRun->start; i--) {
831        if(bd->openings[i].match!=c)
832            continue;
833        /* We have a match */
834        pOpening=&bd->openings[i];
835        direction=pLastIsoRun->level&1;
836        stable=TRUE;            /* assume stable until proved otherwise */
837
838        /* The stable flag is set when brackets are paired and their
839           level is resolved and cannot be changed by what will be
840           found later in the source string.
841           An unstable match can occur only when applying N0c, where
842           the resolved level depends on the preceding context, and
843           this context may be affected by text occurring later.
844           Example: RTL paragraph containing:  abc[(latin) HEBREW]
845           When the closing parenthesis is encountered, it appears
846           that N0c1 must be applied since 'abc' sets an opposite
847           direction context and both parentheses receive level 2.
848           However, when the closing square bracket is processed,
849           N0b applies because of 'HEBREW' being included within the
850           brackets, thus the square brackets are treated like R and
851           receive level 1. However, this changes the preceding
852           context of the opening parenthesis, and it now appears
853           that N0c2 must be applied to the parentheses rather than
854           N0c1. */
855
856        if((direction==0 && pOpening->flags&FOUND_L) ||
857           (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
858            newProp=direction;
859        }
860        else if(pOpening->flags&(FOUND_L|FOUND_R)) {    /* N0c */
861            if(direction!=pOpening->contextDir) {
862                newProp=pOpening->contextDir;           /* N0c1 */
863                /* it is stable if there is no preceding text or in
864                   conditions too complicated and not worth checking */
865                stable=(i==pLastIsoRun->start);
866            }
867            else
868                newProp=direction;                      /* N0c2 */
869        }
870        else {
871            newProp=BN;                                 /* N0d */
872        }
873        if(newProp!=BN) {
874            dirProps[pOpening->position]=newProp;
875            dirProps[position]=newProp;
876            pLastIsoRun->contextDir=newProp;
877            pLastIsoRun->contextPos=position;
878        }
879        /* Update nested N0c pairs that may be affected */
880        if(newProp==direction)
881            fixN0c(bd, i, pOpening->position, newProp);
882        if(stable) {
883            pLastIsoRun->limit=i;   /* forget any brackets nested within this pair */
884            /* remove lower located synonyms if any */
885            while(pLastIsoRun->limit>pLastIsoRun->start &&
886                  bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
887                pLastIsoRun->limit--;
888        }
889        else {
890            pOpening->match=-position;
891            /* neutralize lower located synonyms if any */
892            k=i-1;
893            while(k>=pLastIsoRun->start &&
894                  bd->openings[k].position==pOpening->position)
895                bd->openings[k--].match=0;
896            /* neutralize any unmatched opening between the current pair;
897               this will also neutralize higher located synonyms if any */
898            for(k=i+1; k<pLastIsoRun->limit; k++) {
899                qOpening=&bd->openings[k];
900                if(qOpening->position>=position)
901                    break;
902                if(qOpening->match>0)
903                    qOpening->match=0;
904            }
905        }
906        return TRUE;
907    }
908    /* We get here only if the ON character was not a matching closing bracket */
909    /* Now see if it is an opening bracket */
910    match=u_getBidiPairedBracket(c);    /* get the matching char */
911    if(match==c)                        /* if no matching char */
912        return TRUE;
913    if(ubidi_getPairedBracketType(bd->pBiDi->bdp, c)!=U_BPT_OPEN)
914        return TRUE;                    /* not an opening bracket */
915    /* special case: process synonyms
916       create an opening entry for each synonym */
917    if(match==0x232A) {     /* RIGHT-POINTING ANGLE BRACKET */
918        if(!bracketAddOpening(bd, 0x3009, position))
919            return FALSE;
920    }
921    else if(match==0x3009) {         /* RIGHT ANGLE BRACKET */
922        if(!bracketAddOpening(bd, 0x232A, position))
923            return FALSE;
924    }
925    return bracketAddOpening(bd, match, position);
926}
927
928/* perform (X1)..(X9) ------------------------------------------------------- */
929
930/* determine if the text is mixed-directional or single-directional */
931static UBiDiDirection
932directionFromFlags(UBiDi *pBiDi) {
933    Flags flags=pBiDi->flags;
934    /* if the text contains AN and neutrals, then some neutrals may become RTL */
935    if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
936        return UBIDI_LTR;
937    } else if(!(flags&MASK_LTR)) {
938        return UBIDI_RTL;
939    } else {
940        return UBIDI_MIXED;
941    }
942}
943
944/*
945 * Resolve the explicit levels as specified by explicit embedding codes.
946 * Recalculate the flags to have them reflect the real properties
947 * after taking the explicit embeddings into account.
948 *
949 * The BiDi algorithm is designed to result in the same behavior whether embedding
950 * levels are externally specified (from "styled text", supposedly the preferred
951 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
952 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
953 * However, in a real implementation, the removal of these codes and their index
954 * positions in the plain text is undesirable since it would result in
955 * reallocated, reindexed text.
956 * Instead, this implementation leaves the codes in there and just ignores them
957 * in the subsequent processing.
958 * In order to get the same reordering behavior, positions with a BN or a not-isolate
959 * explicit embedding code just get the same level assigned as the last "real"
960 * character.
961 *
962 * Some implementations, not this one, then overwrite some of these
963 * directionality properties at "real" same-level-run boundaries by
964 * L or R codes so that the resolution of weak types can be performed on the
965 * entire paragraph at once instead of having to parse it once more and
966 * perform that resolution on same-level-runs.
967 * This limits the scope of the implicit rules in effectively
968 * the same way as the run limits.
969 *
970 * Instead, this implementation does not modify these codes, except for
971 * paired brackets whose properties (ON) may be replaced by L or R.
972 * On one hand, the paragraph has to be scanned for same-level-runs, but
973 * on the other hand, this saves another loop to reset these codes,
974 * or saves making and modifying a copy of dirProps[].
975 *
976 *
977 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
978 *
979 *
980 * Handling the stack of explicit levels (Xn):
981 *
982 * With the BiDi stack of explicit levels, as pushed with each
983 * LRE, RLE, LRO, RLO, LRI, RLI and FSO and popped with each PDF and PDI,
984 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
985 *
986 * In order to have a correct push-pop semantics even in the case of overflows,
987 * overflow counters and a valid isolate counter are used as described in UAX#9
988 * section 3.3.2 "Explicit Levels and Directions".
989 *
990 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
991 */
992static UBiDiDirection
993resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
994    DirProp *dirProps=pBiDi->dirProps;
995    UBiDiLevel *levels=pBiDi->levels;
996    const UChar *text=pBiDi->text;
997
998    int32_t i=0, length=pBiDi->length;
999    Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
1000    DirProp dirProp;
1001    UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1002    UBiDiDirection direction;
1003    pBiDi->isolateCount=0;
1004
1005    if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1006
1007    /* determine if the text is mixed-directional or single-directional */
1008    direction=directionFromFlags(pBiDi);
1009
1010    /* we may not need to resolve any explicit levels */
1011    if((direction!=UBIDI_MIXED)) {
1012        /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1013        return direction;
1014    }
1015    if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1016        /* inverse BiDi: mixed, but all characters are at the same embedding level */
1017        /* set all levels to the paragraph level */
1018        int32_t paraIndex, start, limit;
1019        for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1020            if(paraIndex==0)
1021                start=0;
1022            else
1023                start=pBiDi->paras[paraIndex-1].limit;
1024            limit=pBiDi->paras[paraIndex].limit;
1025            level=pBiDi->paras[paraIndex].level;
1026            for(i=start; i<limit; i++)
1027                levels[i]=level;
1028        }
1029        return direction;   /* no bracket matching for inverse BiDi */
1030    }
1031    if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1032        /* no embeddings, set all levels to the paragraph level */
1033        /* we still have to perform bracket matching */
1034        int32_t paraIndex, start, limit;
1035        BracketData bracketData;
1036        bracketInit(pBiDi, &bracketData);
1037        for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1038            if(paraIndex==0)
1039                start=0;
1040            else
1041                start=pBiDi->paras[paraIndex-1].limit;
1042            limit=pBiDi->paras[paraIndex].limit;
1043            level=pBiDi->paras[paraIndex].level;
1044            for(i=start; i<limit; i++) {
1045                levels[i]=level;
1046                dirProp=dirProps[i];
1047                if(dirProp==B) {
1048                    if((i+1)<length) {
1049                        if(text[i]==CR && text[i+1]==LF)
1050                            continue;   /* skip CR when followed by LF */
1051                        bracketProcessB(&bracketData, level);
1052                    }
1053                    continue;
1054                }
1055                if(!bracketProcessChar(&bracketData, i, dirProp)) {
1056                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1057                    return UBIDI_LTR;
1058                }
1059            }
1060        }
1061        return direction;
1062    }
1063    {
1064        /* continue to perform (Xn) */
1065
1066        /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1067        /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1068        UBiDiLevel embeddingLevel=level, newLevel;
1069        UBiDiLevel previousLevel=level;     /* previous level for regular (not CC) characters */
1070        int32_t lastCcPos=0;                /* index of last effective LRx,RLx, PDx */
1071
1072        uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2];   /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1073                                                        but we need one more entry as base */
1074        uint32_t stackLast=0;
1075        int32_t overflowIsolateCount=0;
1076        int32_t overflowEmbeddingCount=0;
1077        int32_t validIsolateCount=0;
1078        BracketData bracketData;
1079        bracketInit(pBiDi, &bracketData);
1080        stack[0]=level;     /* initialize base entry to para level, no override, no isolate */
1081
1082        /* recalculate the flags */
1083        flags=0;
1084
1085        for(i=0; i<length; ++i) {
1086            dirProp=dirProps[i];
1087            switch(dirProp) {
1088            case LRE:
1089            case RLE:
1090            case LRO:
1091            case RLO:
1092                /* (X2, X3, X4, X5) */
1093                flags|=DIRPROP_FLAG(BN);
1094                if (dirProp==LRE || dirProp==LRO)
1095                    newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
1096                else
1097                    newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
1098                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1099                                                         overflowEmbeddingCount==0) {
1100                    lastCcPos=i;
1101                    embeddingLevel=newLevel;
1102                    if(dirProp==LRO || dirProp==RLO)
1103                        embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1104                    stackLast++;
1105                    stack[stackLast]=embeddingLevel;
1106                    /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1107                       since this has already been done for newLevel which is
1108                       the source for embeddingLevel.
1109                     */
1110                } else {
1111                    dirProps[i]|=IGNORE_CC;
1112                    if(overflowIsolateCount==0)
1113                        overflowEmbeddingCount++;
1114                }
1115                break;
1116            case PDF:
1117                /* (X7) */
1118                flags|=DIRPROP_FLAG(BN);
1119                /* handle all the overflow cases first */
1120                if(overflowIsolateCount) {
1121                    dirProps[i]|=IGNORE_CC;
1122                    break;
1123                }
1124                if(overflowEmbeddingCount) {
1125                    dirProps[i]|=IGNORE_CC;
1126                    overflowEmbeddingCount--;
1127                    break;
1128                }
1129                if(stackLast>0 && stack[stackLast]<ISOLATE) {   /* not an isolate entry */
1130                    lastCcPos=i;
1131                    stackLast--;
1132                    embeddingLevel=(UBiDiLevel)stack[stackLast];
1133                } else
1134                    dirProps[i]|=IGNORE_CC;
1135                break;
1136            case LRI:
1137            case RLI:
1138                if(embeddingLevel!=previousLevel) {
1139                    bracketProcessBoundary(&bracketData, lastCcPos,
1140                                           previousLevel, embeddingLevel);
1141                    previousLevel=embeddingLevel;
1142                }
1143                /* (X5a, X5b) */
1144                flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
1145                level=embeddingLevel;
1146                if(dirProp==LRI)
1147                    newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
1148                else
1149                    newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
1150                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1151                                                         overflowEmbeddingCount==0) {
1152                    lastCcPos=i;
1153                    previousLevel=embeddingLevel;
1154                    validIsolateCount++;
1155                    if(validIsolateCount>pBiDi->isolateCount)
1156                        pBiDi->isolateCount=validIsolateCount;
1157                    embeddingLevel=newLevel;
1158                    stackLast++;
1159                    stack[stackLast]=embeddingLevel+ISOLATE;
1160                    bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1161                } else {
1162                    dirProps[i]|=IGNORE_CC;
1163                    overflowIsolateCount++;
1164                }
1165                break;
1166            case PDI:
1167                if(embeddingLevel!=previousLevel) {
1168                    bracketProcessBoundary(&bracketData, lastCcPos,
1169                                           previousLevel, embeddingLevel);
1170                }
1171                /* (X6a) */
1172                if(overflowIsolateCount) {
1173                    dirProps[i]|=IGNORE_CC;
1174                    overflowIsolateCount--;
1175                }
1176                else if(validIsolateCount) {
1177                    lastCcPos=i;
1178                    overflowEmbeddingCount=0;
1179                    while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1180                        stackLast--;                /* until the last isolate entry */
1181                    stackLast--;                    /* pop also the last isolate entry */
1182                    validIsolateCount--;
1183                    bracketProcessPDI(&bracketData);
1184                } else
1185                    dirProps[i]|=IGNORE_CC;
1186                embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1187                previousLevel=level=embeddingLevel;
1188                flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
1189                break;
1190            case B:
1191                level=GET_PARALEVEL(pBiDi, i);
1192                if((i+1)<length) {
1193                    if(text[i]==CR && text[i+1]==LF)
1194                        break;          /* skip CR when followed by LF */
1195                    overflowEmbeddingCount=overflowIsolateCount=0;
1196                    validIsolateCount=0;
1197                    stackLast=0;
1198                    stack[0]=level; /* initialize base entry to para level, no override, no isolate */
1199                    previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1200                    bracketProcessB(&bracketData, embeddingLevel);
1201                }
1202                flags|=DIRPROP_FLAG(B);
1203                break;
1204            case BN:
1205                /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1206                /* they will get their levels set correctly in adjustWSLevels() */
1207                flags|=DIRPROP_FLAG(BN);
1208                break;
1209            default:
1210                /* all other types get the "real" level */
1211                level=embeddingLevel;
1212                if(embeddingLevel!=previousLevel) {
1213                    bracketProcessBoundary(&bracketData, lastCcPos,
1214                                           previousLevel, embeddingLevel);
1215                    previousLevel=embeddingLevel;
1216                }
1217                if(level&UBIDI_LEVEL_OVERRIDE)
1218                    flags|=DIRPROP_FLAG_LR(level);
1219                else
1220                    flags|=DIRPROP_FLAG(dirProp);
1221                if(!bracketProcessChar(&bracketData, i, dirProp))
1222                    return -1;
1223                break;
1224            }
1225
1226            /*
1227             * We need to set reasonable levels even on BN codes and
1228             * explicit codes because we will later look at same-level runs (X10).
1229             */
1230            levels[i]=level;
1231            if(i>0 && levels[i-1]!=level) {
1232                flags|=DIRPROP_FLAG_MULTI_RUNS;
1233                if(level&UBIDI_LEVEL_OVERRIDE)
1234                    flags|=DIRPROP_FLAG_O(level);
1235                else
1236                    flags|=DIRPROP_FLAG_E(level);
1237            }
1238            if(DIRPROP_FLAG(dirProp)&MASK_ISO)
1239                level=embeddingLevel;
1240        }
1241        if(flags&MASK_EMBEDDING) {
1242            flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1243        }
1244        if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
1245            flags|=DIRPROP_FLAG(L);
1246        }
1247
1248        /* subsequently, ignore the explicit codes and BN (X9) */
1249
1250        /* again, determine if the text is mixed-directional or single-directional */
1251        pBiDi->flags=flags;
1252        direction=directionFromFlags(pBiDi);
1253    }
1254    return direction;
1255}
1256
1257/*
1258 * Use a pre-specified embedding levels array:
1259 *
1260 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1261 * ignore all explicit codes (X9),
1262 * and check all the preset levels.
1263 *
1264 * Recalculate the flags to have them reflect the real properties
1265 * after taking the explicit embeddings into account.
1266 */
1267static UBiDiDirection
1268checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1269    DirProp *dirProps=pBiDi->dirProps;
1270    DirProp dirProp;
1271    UBiDiLevel *levels=pBiDi->levels;
1272    int32_t isolateCount=0;
1273
1274    int32_t i, length=pBiDi->length;
1275    Flags flags=0;  /* collect all directionalities in the text */
1276    UBiDiLevel level;
1277    pBiDi->isolateCount=0;
1278
1279    for(i=0; i<length; ++i) {
1280        level=levels[i];
1281        dirProp=dirProps[i];
1282        if(dirProp==LRI || dirProp==RLI) {
1283            isolateCount++;
1284            if(isolateCount>pBiDi->isolateCount)
1285                pBiDi->isolateCount=isolateCount;
1286        }
1287        else if(dirProp==PDI)
1288            isolateCount--;
1289        else if(dirProp==B)
1290            isolateCount=0;
1291        if(level&UBIDI_LEVEL_OVERRIDE) {
1292            /* keep the override flag in levels[i] but adjust the flags */
1293            level&=~UBIDI_LEVEL_OVERRIDE;     /* make the range check below simpler */
1294            flags|=DIRPROP_FLAG_O(level);
1295        } else {
1296            /* set the flags */
1297            flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1298        }
1299        if((level<GET_PARALEVEL(pBiDi, i) &&
1300            !((0==level)&&(dirProp==B))) ||
1301           (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
1302            /* level out of bounds */
1303            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1304            return UBIDI_LTR;
1305        }
1306    }
1307    if(flags&MASK_EMBEDDING) {
1308        flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1309    }
1310
1311    /* determine if the text is mixed-directional or single-directional */
1312    pBiDi->flags=flags;
1313    return directionFromFlags(pBiDi);
1314}
1315
1316/******************************************************************
1317 The Properties state machine table
1318*******************************************************************
1319
1320 All table cells are 8 bits:
1321      bits 0..4:  next state
1322      bits 5..7:  action to perform (if > 0)
1323
1324 Cells may be of format "n" where n represents the next state
1325 (except for the rightmost column).
1326 Cells may also be of format "s(x,y)" where x represents an action
1327 to perform and y represents the next state.
1328
1329*******************************************************************
1330 Definitions and type for properties state table
1331*******************************************************************
1332*/
1333#define IMPTABPROPS_COLUMNS 16
1334#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1335#define GET_STATEPROPS(cell) ((cell)&0x1f)
1336#define GET_ACTIONPROPS(cell) ((cell)>>5)
1337#define s(action, newState) ((uint8_t)(newState+(action<<5)))
1338
1339static const uint8_t groupProp[] =          /* dirProp regrouped */
1340{
1341/*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
1342    0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
1343};
1344enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1345
1346/******************************************************************
1347
1348      PROPERTIES  STATE  TABLE
1349
1350 In table impTabProps,
1351      - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1352      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1353      - the Res column is the reduced property assigned to a run
1354
1355 Action 1: process current run1, init new run1
1356        2: init new run2
1357        3: process run1, process run2, init new run1
1358        4: process run1, set run1=run2, init new run2
1359
1360 Notes:
1361  1) This table is used in resolveImplicitLevels().
1362  2) This table triggers actions when there is a change in the Bidi
1363     property of incoming characters (action 1).
1364  3) Most such property sequences are processed immediately (in
1365     fact, passed to processPropertySeq().
1366  4) However, numbers are assembled as one sequence. This means
1367     that undefined situations (like CS following digits, until
1368     it is known if the next char will be a digit) are held until
1369     following chars define them.
1370     Example: digits followed by CS, then comes another CS or ON;
1371              the digits will be processed, then the CS assigned
1372              as the start of an ON sequence (action 3).
1373  5) There are cases where more than one sequence must be
1374     processed, for instance digits followed by CS followed by L:
1375     the digits must be processed as one sequence, and the CS
1376     must be processed as an ON sequence, all this before starting
1377     assembling chars for the opening L sequence.
1378
1379
1380*/
1381static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1382{
1383/*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,   ENL ,   ENR , Res */
1384/* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,    18 ,    21 , DirProp_ON },
1385/* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),s(1,18),s(1,21),  DirProp_L },
1386/* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),s(1,18),s(1,21),  DirProp_R },
1387/* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,s(1,18),s(1,21),  DirProp_R },
1388/* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),    18 ,    21 , DirProp_EN },
1389/* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1390/* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),    18 ,    21 , DirProp_AN },
1391/* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1392/* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1393/* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),    18 ,    21 , DirProp_ON },
1394/*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),    18 ,    21 , DirProp_EN },
1395/*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),    18 ,    21 , DirProp_EN },
1396/*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1397/*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),    18 ,    21 , DirProp_AN },
1398/*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1399/*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_S },
1400/*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),s(1,18),s(1,21),  DirProp_S },
1401/*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_B },
1402/*18 ENL         */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19),    20 ,s(2,19),    18 ,    18 , s(1,3),    18 ,    21 ,  DirProp_L },
1403/*19 ENL+ES/CS   */ { s(3,1), s(3,2),    18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    19 , s(4,7), s(3,3),    18 ,    21 ,  DirProp_L },
1404/*20 ENL+ET      */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    20 , s(1,7),    20 ,    20 , s(1,3),    18 ,    21 ,  DirProp_L },
1405/*21 ENR         */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22),    23 ,s(2,22),    21 ,    21 , s(1,3),    18 ,    21 , DirProp_AN },
1406/*22 ENR+ES/CS   */ { s(3,1), s(3,2),    21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    22 , s(4,7), s(3,3),    18 ,    21 , DirProp_AN },
1407/*23 ENR+ET      */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    23 , s(1,7),    23 ,    23 , s(1,3),    18 ,    21 , DirProp_AN }
1408};
1409
1410/*  we must undef macro s because the levels table have a different
1411 *  structure (4 bits for action and 4 bits for next state.
1412 */
1413#undef s
1414
1415/******************************************************************
1416 The levels state machine tables
1417*******************************************************************
1418
1419 All table cells are 8 bits:
1420      bits 0..3:  next state
1421      bits 4..7:  action to perform (if > 0)
1422
1423 Cells may be of format "n" where n represents the next state
1424 (except for the rightmost column).
1425 Cells may also be of format "s(x,y)" where x represents an action
1426 to perform and y represents the next state.
1427
1428 This format limits each table to 16 states each and to 15 actions.
1429
1430*******************************************************************
1431 Definitions and type for levels state tables
1432*******************************************************************
1433*/
1434#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1435#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1436#define GET_STATE(cell) ((cell)&0x0f)
1437#define GET_ACTION(cell) ((cell)>>4)
1438#define s(action, newState) ((uint8_t)(newState+(action<<4)))
1439
1440typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1441typedef uint8_t ImpAct[];
1442
1443/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1444 * instead of having a pair of ImpTab and a pair of ImpAct.
1445 */
1446typedef struct ImpTabPair {
1447    const void * pImpTab[2];
1448    const void * pImpAct[2];
1449} ImpTabPair;
1450
1451/******************************************************************
1452
1453      LEVELS  STATE  TABLES
1454
1455 In all levels state tables,
1456      - state 0 is the initial state
1457      - the Res column is the increment to add to the text level
1458        for this property sequence.
1459
1460 The impAct arrays for each table of a pair map the local action
1461 numbers of the table to the total list of actions. For instance,
1462 action 2 in a given table corresponds to the action number which
1463 appears in entry [2] of the impAct array for that table.
1464 The first entry of all impAct arrays must be 0.
1465
1466 Action 1: init conditional sequence
1467        2: prepend conditional sequence to current sequence
1468        3: set ON sequence to new level - 1
1469        4: init EN/AN/ON sequence
1470        5: fix EN/AN/ON sequence followed by R
1471        6: set previous level sequence to level 2
1472
1473 Notes:
1474  1) These tables are used in processPropertySeq(). The input
1475     is property sequences as determined by resolveImplicitLevels.
1476  2) Most such property sequences are processed immediately
1477     (levels are assigned).
1478  3) However, some sequences cannot be assigned a final level till
1479     one or more following sequences are received. For instance,
1480     ON following an R sequence within an even-level paragraph.
1481     If the following sequence is R, the ON sequence will be
1482     assigned basic run level+1, and so will the R sequence.
1483  4) S is generally handled like ON, since its level will be fixed
1484     to paragraph level in adjustWSLevels().
1485
1486*/
1487
1488static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
1489/*  In this table, conditional sequences receive the higher possible level
1490    until proven otherwise.
1491*/
1492{
1493/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1494/* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
1495/* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1496/* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
1497/* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
1498/* 4 : R+ON       */ { s(2,0),     1 ,     3 ,     3 ,     4 ,     4 , s(2,0),  1 },
1499/* 5 : AN+ON      */ { s(2,0),     1 , s(2,0),     2 ,     5 ,     5 , s(2,0),  1 }
1500};
1501static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
1502/*  In this table, conditional sequences receive the lower possible level
1503    until proven otherwise.
1504*/
1505{
1506/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1507/* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1508/* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
1509/* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1510/* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
1511/* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
1512/* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
1513};
1514static const ImpAct impAct0 = {0,1,2,3,4,5,6};
1515static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1516                                           &impTabR_DEFAULT},
1517                                          {&impAct0, &impAct0}};
1518
1519static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
1520/*  In this table, conditional sequences receive the higher possible level
1521    until proven otherwise.
1522*/
1523{
1524/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1525/* 0 : init       */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  0 },
1526/* 1 : L+EN/AN    */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  2 },
1527/* 2 : R          */ {     0 ,     2 ,    4 ,      4 , s(1,3),     0 ,     0 ,  1 },
1528/* 3 : R+ON       */ { s(2,0),     2 ,    4 ,      4 ,     3 ,     3 , s(2,0),  1 },
1529/* 4 : R+EN/AN    */ {     0 ,     2 ,    4 ,      4 , s(1,3), s(1,3),     0 ,  2 }
1530  };
1531static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1532                                                   &impTabR_DEFAULT},
1533                                                  {&impAct0, &impAct0}};
1534
1535static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1536/*  In this table, EN/AN+ON sequences receive levels as if associated with R
1537    until proven that there is L or sor/eor on both sides. AN is handled like EN.
1538*/
1539{
1540/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1541/* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1542/* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
1543/* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
1544/* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
1545/* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
1546/* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
1547};
1548static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1549/*  In this table, EN/AN+ON sequences receive levels as if associated with R
1550    until proven that there is L on both sides. AN is handled like EN.
1551*/
1552{
1553/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1554/* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1555/* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1556/* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
1557/* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
1558/* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
1559};
1560static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1561                        {&impTabL_GROUP_NUMBERS_WITH_R,
1562                         &impTabR_GROUP_NUMBERS_WITH_R},
1563                        {&impAct0, &impAct0}};
1564
1565
1566static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1567/*  This table is identical to the Default LTR table except that EN and AN are
1568    handled like L.
1569*/
1570{
1571/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1572/* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
1573/* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
1574/* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
1575/* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
1576/* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
1577/* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
1578};
1579static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1580/*  This table is identical to the Default RTL table except that EN and AN are
1581    handled like L.
1582*/
1583{
1584/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1585/* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1586/* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
1587/* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1588/* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
1589/* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
1590/* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
1591};
1592static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1593                        {&impTabL_INVERSE_NUMBERS_AS_L,
1594                         &impTabR_INVERSE_NUMBERS_AS_L},
1595                        {&impAct0, &impAct0}};
1596
1597static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
1598/*  In this table, conditional sequences receive the lower possible level
1599    until proven otherwise.
1600*/
1601{
1602/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1603/* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1604/* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
1605/* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1606/* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
1607/* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
1608/* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
1609/* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
1610};
1611static const ImpAct impAct1 = {0,1,11,12};
1612/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1613 */
1614static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1615                        {&impTabL_DEFAULT,
1616                         &impTabR_INVERSE_LIKE_DIRECT},
1617                        {&impAct0, &impAct1}};
1618
1619static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1620/*  The case handled in this table is (visually):  R EN L
1621*/
1622{
1623/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1624/* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
1625/* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
1626/* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
1627/* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
1628/* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
1629/* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
1630/* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
1631};
1632static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1633/*  The cases handled in this table are (visually):  R EN L
1634                                                     R L AN L
1635*/
1636{
1637/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1638/* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1639/* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
1640/* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
1641/* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
1642/* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
1643/* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
1644/* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
1645};
1646static const ImpAct impAct2 = {0,1,7,8,9,10};
1647static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1648                        {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1649                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1650                        {&impAct0, &impAct2}};
1651
1652static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1653                        {&impTabL_NUMBERS_SPECIAL,
1654                         &impTabR_INVERSE_LIKE_DIRECT},
1655                        {&impAct0, &impAct1}};
1656
1657static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1658/*  The case handled in this table is (visually):  R EN L
1659*/
1660{
1661/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1662/* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1663/* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
1664/* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
1665/* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
1666/* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
1667};
1668static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1669                        {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1670                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1671                        {&impAct0, &impAct2}};
1672
1673#undef s
1674
1675typedef struct {
1676    const ImpTab * pImpTab;             /* level table pointer          */
1677    const ImpAct * pImpAct;             /* action map array             */
1678    int32_t startON;                    /* start of ON sequence         */
1679    int32_t startL2EN;                  /* start of level 2 sequence    */
1680    int32_t lastStrongRTL;              /* index of last found R or AL  */
1681    int32_t state;                      /* current state                */
1682    int32_t runStart;                   /* start position of the run    */
1683    UBiDiLevel runLevel;                /* run level before implicit solving */
1684} LevState;
1685
1686/*------------------------------------------------------------------------*/
1687
1688static void
1689addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1690  /* param pos:     position where to insert
1691     param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1692  */
1693{
1694#define FIRSTALLOC  10
1695    Point point;
1696    InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1697
1698    if (pInsertPoints->capacity == 0)
1699    {
1700        pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
1701        if (pInsertPoints->points == NULL)
1702        {
1703            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1704            return;
1705        }
1706        pInsertPoints->capacity=FIRSTALLOC;
1707    }
1708    if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1709    {
1710        void * savePoints=pInsertPoints->points;
1711        pInsertPoints->points=uprv_realloc(pInsertPoints->points,
1712                                           pInsertPoints->capacity*2*sizeof(Point));
1713        if (pInsertPoints->points == NULL)
1714        {
1715            pInsertPoints->points=savePoints;
1716            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1717            return;
1718        }
1719        else  pInsertPoints->capacity*=2;
1720    }
1721    point.pos=pos;
1722    point.flag=flag;
1723    pInsertPoints->points[pInsertPoints->size]=point;
1724    pInsertPoints->size++;
1725#undef FIRSTALLOC
1726}
1727
1728/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1729
1730/*
1731 * This implementation of the (Wn) rules applies all rules in one pass.
1732 * In order to do so, it needs a look-ahead of typically 1 character
1733 * (except for W5: sequences of ET) and keeps track of changes
1734 * in a rule Wp that affect a later Wq (p<q).
1735 *
1736 * The (Nn) and (In) rules are also performed in that same single loop,
1737 * but effectively one iteration behind for white space.
1738 *
1739 * Since all implicit rules are performed in one step, it is not necessary
1740 * to actually store the intermediate directional properties in dirProps[].
1741 */
1742
1743static void
1744processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1745                   int32_t start, int32_t limit) {
1746    uint8_t cell, oldStateSeq, actionSeq;
1747    const ImpTab * pImpTab=pLevState->pImpTab;
1748    const ImpAct * pImpAct=pLevState->pImpAct;
1749    UBiDiLevel * levels=pBiDi->levels;
1750    UBiDiLevel level, addLevel;
1751    InsertPoints * pInsertPoints;
1752    int32_t start0, k;
1753
1754    start0=start;                           /* save original start position */
1755    oldStateSeq=(uint8_t)pLevState->state;
1756    cell=(*pImpTab)[oldStateSeq][_prop];
1757    pLevState->state=GET_STATE(cell);       /* isolate the new state */
1758    actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1759    addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1760
1761    if(actionSeq) {
1762        switch(actionSeq) {
1763        case 1:                         /* init ON seq */
1764            pLevState->startON=start0;
1765            break;
1766
1767        case 2:                         /* prepend ON seq to current seq */
1768            start=pLevState->startON;
1769            break;
1770
1771        case 3:                         /* L or S after possible relevant EN/AN */
1772            /* check if we had EN after R/AL */
1773            if (pLevState->startL2EN >= 0) {
1774                addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1775            }
1776            pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
1777            /* check if we had any relevant EN/AN after R/AL */
1778            pInsertPoints=&(pBiDi->insertPoints);
1779            if ((pInsertPoints->capacity == 0) ||
1780                (pInsertPoints->size <= pInsertPoints->confirmed))
1781            {
1782                /* nothing, just clean up */
1783                pLevState->lastStrongRTL=-1;
1784                /* check if we have a pending conditional segment */
1785                level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1786                if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
1787                    start=pLevState->startON;   /* reset to basic run level */
1788                }
1789                if (_prop == DirProp_S)                /* add LRM before S */
1790                {
1791                    addPoint(pBiDi, start0, LRM_BEFORE);
1792                    pInsertPoints->confirmed=pInsertPoints->size;
1793                }
1794                break;
1795            }
1796            /* reset previous RTL cont to level for LTR text */
1797            for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1798            {
1799                /* reset odd level, leave runLevel+2 as is */
1800                levels[k]=(levels[k] - 2) & ~1;
1801            }
1802            /* mark insert points as confirmed */
1803            pInsertPoints->confirmed=pInsertPoints->size;
1804            pLevState->lastStrongRTL=-1;
1805            if (_prop == DirProp_S)            /* add LRM before S */
1806            {
1807                addPoint(pBiDi, start0, LRM_BEFORE);
1808                pInsertPoints->confirmed=pInsertPoints->size;
1809            }
1810            break;
1811
1812        case 4:                         /* R/AL after possible relevant EN/AN */
1813            /* just clean up */
1814            pInsertPoints=&(pBiDi->insertPoints);
1815            if (pInsertPoints->capacity > 0)
1816                /* remove all non confirmed insert points */
1817                pInsertPoints->size=pInsertPoints->confirmed;
1818            pLevState->startON=-1;
1819            pLevState->startL2EN=-1;
1820            pLevState->lastStrongRTL=limit - 1;
1821            break;
1822
1823        case 5:                         /* EN/AN after R/AL + possible cont */
1824            /* check for real AN */
1825            if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
1826                (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1827            {
1828                /* real AN */
1829                if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1830                {
1831                    /* just note the righmost digit as a strong RTL */
1832                    pLevState->lastStrongRTL=limit - 1;
1833                    break;
1834                }
1835                if (pLevState->startL2EN >= 0)  /* after EN, no AN */
1836                {
1837                    addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1838                    pLevState->startL2EN=-2;
1839                }
1840                /* note AN */
1841                addPoint(pBiDi, start0, LRM_BEFORE);
1842                break;
1843            }
1844            /* if first EN/AN after R/AL */
1845            if (pLevState->startL2EN == -1) {
1846                pLevState->startL2EN=start0;
1847            }
1848            break;
1849
1850        case 6:                         /* note location of latest R/AL */
1851            pLevState->lastStrongRTL=limit - 1;
1852            pLevState->startON=-1;
1853            break;
1854
1855        case 7:                         /* L after R+ON/EN/AN */
1856            /* include possible adjacent number on the left */
1857            for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1858            if(k>=0) {
1859                addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
1860                pInsertPoints=&(pBiDi->insertPoints);
1861                pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
1862            }
1863            pLevState->startON=start0;
1864            break;
1865
1866        case 8:                         /* AN after L */
1867            /* AN numbers between L text on both sides may be trouble. */
1868            /* tentatively bracket with LRMs; will be confirmed if followed by L */
1869            addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
1870            addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
1871            break;
1872
1873        case 9:                         /* R after L+ON/EN/AN */
1874            /* false alert, infirm LRMs around previous AN */
1875            pInsertPoints=&(pBiDi->insertPoints);
1876            pInsertPoints->size=pInsertPoints->confirmed;
1877            if (_prop == DirProp_S)            /* add RLM before S */
1878            {
1879                addPoint(pBiDi, start0, RLM_BEFORE);
1880                pInsertPoints->confirmed=pInsertPoints->size;
1881            }
1882            break;
1883
1884        case 10:                        /* L after L+ON/AN */
1885            level=pLevState->runLevel + addLevel;
1886            for(k=pLevState->startON; k<start0; k++) {
1887                if (levels[k]<level)
1888                    levels[k]=level;
1889            }
1890            pInsertPoints=&(pBiDi->insertPoints);
1891            pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
1892            pLevState->startON=start0;
1893            break;
1894
1895        case 11:                        /* L after L+ON+EN/AN/ON */
1896            level=pLevState->runLevel;
1897            for(k=start0-1; k>=pLevState->startON; k--) {
1898                if(levels[k]==level+3) {
1899                    while(levels[k]==level+3) {
1900                        levels[k--]-=2;
1901                    }
1902                    while(levels[k]==level) {
1903                        k--;
1904                    }
1905                }
1906                if(levels[k]==level+2) {
1907                    levels[k]=level;
1908                    continue;
1909                }
1910                levels[k]=level+1;
1911            }
1912            break;
1913
1914        case 12:                        /* R after L+ON+EN/AN/ON */
1915            level=pLevState->runLevel+1;
1916            for(k=start0-1; k>=pLevState->startON; k--) {
1917                if(levels[k]>level) {
1918                    levels[k]-=2;
1919                }
1920            }
1921            break;
1922
1923        default:                        /* we should never get here */
1924            U_ASSERT(FALSE);
1925            break;
1926        }
1927    }
1928    if((addLevel) || (start < start0)) {
1929        level=pLevState->runLevel + addLevel;
1930        if(start>=pLevState->runStart) {
1931            for(k=start; k<limit; k++) {
1932                levels[k]=level;
1933            }
1934        } else {
1935            DirProp *dirProps=pBiDi->dirProps, dirProp;
1936            int32_t isolateCount=0;
1937            for(k=start; k<limit; k++) {
1938                dirProp=dirProps[k];
1939                if(dirProp==PDI)
1940                    isolateCount--;
1941                if(isolateCount==0)
1942                    levels[k]=level;
1943                if(dirProp==LRI || dirProp==RLI)
1944                    isolateCount++;
1945            }
1946        }
1947    }
1948}
1949
1950/**
1951 * Returns the directionality of the last strong character at the end of the prologue, if any.
1952 * Requires prologue!=null.
1953 */
1954static DirProp
1955lastL_R_AL(UBiDi *pBiDi) {
1956    const UChar *text=pBiDi->prologue;
1957    int32_t length=pBiDi->proLength;
1958    int32_t i;
1959    UChar32 uchar;
1960    DirProp dirProp;
1961    for(i=length; i>0; ) {
1962        /* i is decremented by U16_PREV */
1963        U16_PREV(text, 0, i, uchar);
1964        dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
1965        if(dirProp==L) {
1966            return DirProp_L;
1967        }
1968        if(dirProp==R || dirProp==AL) {
1969            return DirProp_R;
1970        }
1971        if(dirProp==B) {
1972            return DirProp_ON;
1973        }
1974    }
1975    return DirProp_ON;
1976}
1977
1978/**
1979 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
1980 * Requires epilogue!=null.
1981 */
1982static DirProp
1983firstL_R_AL_EN_AN(UBiDi *pBiDi) {
1984    const UChar *text=pBiDi->epilogue;
1985    int32_t length=pBiDi->epiLength;
1986    int32_t i;
1987    UChar32 uchar;
1988    DirProp dirProp;
1989    for(i=0; i<length; ) {
1990        /* i is incremented by U16_NEXT */
1991        U16_NEXT(text, i, length, uchar);
1992        dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
1993        if(dirProp==L) {
1994            return DirProp_L;
1995        }
1996        if(dirProp==R || dirProp==AL) {
1997            return DirProp_R;
1998        }
1999        if(dirProp==EN) {
2000            return DirProp_EN;
2001        }
2002        if(dirProp==AN) {
2003            return DirProp_AN;
2004        }
2005    }
2006    return DirProp_ON;
2007}
2008
2009static void
2010resolveImplicitLevels(UBiDi *pBiDi,
2011                      int32_t start, int32_t limit,
2012                      DirProp sor, DirProp eor) {
2013    const DirProp *dirProps=pBiDi->dirProps;
2014    DirProp dirProp;
2015    LevState levState;
2016    int32_t i, start1, start2;
2017    uint16_t oldStateImp, stateImp, actionImp;
2018    uint8_t gprop, resProp, cell;
2019    UBool inverseRTL;
2020    DirProp nextStrongProp=R;
2021    int32_t nextStrongPos=-1;
2022
2023    /* check for RTL inverse BiDi mode */
2024    /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2025     * loop on the text characters from end to start.
2026     * This would need a different properties state table (at least different
2027     * actions) and different levels state tables (maybe very similar to the
2028     * LTR corresponding ones.
2029     */
2030    inverseRTL=(UBool)
2031        ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2032         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
2033          pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2034
2035    /* initialize for property and levels state tables */
2036    levState.startON=-1;
2037    levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2038    levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2039    levState.runStart=start;
2040    levState.runLevel=pBiDi->levels[start];
2041    levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2042    levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
2043    if(start==0 && pBiDi->proLength>0) {
2044        DirProp lastStrong=lastL_R_AL(pBiDi);
2045        if(lastStrong!=DirProp_ON) {
2046            sor=lastStrong;
2047        }
2048    }
2049    /* The isolates[] entries contain enough information to
2050       resume the bidi algorithm in the same state as it was
2051       when it was interrupted by an isolate sequence. */
2052    if(dirProps[start]==PDI) {
2053        start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2054        stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2055        levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2056        pBiDi->isolateCount--;
2057    } else {
2058        start1=start;
2059        if(dirProps[start]==NSM)
2060            stateImp = 1 + sor;
2061        else
2062            stateImp=0;
2063        levState.state=0;
2064        processPropertySeq(pBiDi, &levState, sor, start, start);
2065    }
2066    start2=start;
2067
2068    for(i=start; i<=limit; i++) {
2069        if(i>=limit) {
2070            if(limit>start) {
2071                dirProp=pBiDi->dirProps[limit-1];
2072                if(dirProp==LRI || dirProp==RLI)
2073                    break;  /* no forced closing for sequence ending with LRI/RLI */
2074            }
2075            gprop=eor;
2076        } else {
2077            DirProp prop, prop1;
2078            prop=PURE_DIRPROP(dirProps[i]);
2079            if(inverseRTL) {
2080                if(prop==AL) {
2081                    /* AL before EN does not make it AN */
2082                    prop=R;
2083                } else if(prop==EN) {
2084                    if(nextStrongPos<=i) {
2085                        /* look for next strong char (L/R/AL) */
2086                        int32_t j;
2087                        nextStrongProp=R;   /* set default */
2088                        nextStrongPos=limit;
2089                        for(j=i+1; j<limit; j++) {
2090                            prop1=dirProps[j];
2091                            if(prop1==L || prop1==R || prop1==AL) {
2092                                nextStrongProp=prop1;
2093                                nextStrongPos=j;
2094                                break;
2095                            }
2096                        }
2097                    }
2098                    if(nextStrongProp==AL) {
2099                        prop=AN;
2100                    }
2101                }
2102            }
2103            gprop=groupProp[prop];
2104        }
2105        oldStateImp=stateImp;
2106        cell=impTabProps[oldStateImp][gprop];
2107        stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
2108        actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
2109        if((i==limit) && (actionImp==0)) {
2110            /* there is an unprocessed sequence if its property == eor   */
2111            actionImp=1;                    /* process the last sequence */
2112        }
2113        if(actionImp) {
2114            resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2115            switch(actionImp) {
2116            case 1:             /* process current seq1, init new seq1 */
2117                processPropertySeq(pBiDi, &levState, resProp, start1, i);
2118                start1=i;
2119                break;
2120            case 2:             /* init new seq2 */
2121                start2=i;
2122                break;
2123            case 3:             /* process seq1, process seq2, init new seq1 */
2124                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2125                processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2126                start1=i;
2127                break;
2128            case 4:             /* process seq1, set seq1=seq2, init new seq2 */
2129                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2130                start1=start2;
2131                start2=i;
2132                break;
2133            default:            /* we should never get here */
2134                U_ASSERT(FALSE);
2135                break;
2136            }
2137        }
2138    }
2139
2140    /* flush possible pending sequence, e.g. ON */
2141    if(limit==pBiDi->length && pBiDi->epiLength>0) {
2142        DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2143        if(firstStrong!=DirProp_ON) {
2144            eor=firstStrong;
2145        }
2146    }
2147
2148    dirProp=dirProps[limit-1];
2149    if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2150        pBiDi->isolateCount++;
2151        pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2152        pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2153        pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2154    }
2155    else
2156        processPropertySeq(pBiDi, &levState, eor, limit, limit);
2157}
2158
2159/* perform (L1) and (X9) ---------------------------------------------------- */
2160
2161/*
2162 * Reset the embedding levels for some non-graphic characters (L1).
2163 * This function also sets appropriate levels for BN, and
2164 * explicit embedding types that are supposed to have been removed
2165 * from the paragraph in (X9).
2166 */
2167static void
2168adjustWSLevels(UBiDi *pBiDi) {
2169    const DirProp *dirProps=pBiDi->dirProps;
2170    UBiDiLevel *levels=pBiDi->levels;
2171    int32_t i;
2172
2173    if(pBiDi->flags&MASK_WS) {
2174        UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2175        Flags flag;
2176
2177        i=pBiDi->trailingWSStart;
2178        while(i>0) {
2179            /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2180            while(i>0 && (flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])))&MASK_WS) {
2181                if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2182                    levels[i]=0;
2183                } else {
2184                    levels[i]=GET_PARALEVEL(pBiDi, i);
2185                }
2186            }
2187
2188            /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2189            /* here, i+1 is guaranteed to be <length */
2190            while(i>0) {
2191                flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i]));
2192                if(flag&MASK_BN_EXPLICIT) {
2193                    levels[i]=levels[i+1];
2194                } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2195                    levels[i]=0;
2196                    break;
2197                } else if(flag&MASK_B_S) {
2198                    levels[i]=GET_PARALEVEL(pBiDi, i);
2199                    break;
2200                }
2201            }
2202        }
2203    }
2204}
2205
2206U_CAPI void U_EXPORT2
2207ubidi_setContext(UBiDi *pBiDi,
2208                 const UChar *prologue, int32_t proLength,
2209                 const UChar *epilogue, int32_t epiLength,
2210                 UErrorCode *pErrorCode) {
2211    /* check the argument values */
2212    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2213    if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2214       (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2215        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2216        return;
2217    }
2218
2219    if(proLength==-1) {
2220        pBiDi->proLength=u_strlen(prologue);
2221    } else {
2222        pBiDi->proLength=proLength;
2223    }
2224    if(epiLength==-1) {
2225        pBiDi->epiLength=u_strlen(epilogue);
2226    } else {
2227        pBiDi->epiLength=epiLength;
2228    }
2229    pBiDi->prologue=prologue;
2230    pBiDi->epilogue=epilogue;
2231}
2232
2233static void
2234setParaSuccess(UBiDi *pBiDi) {
2235    pBiDi->proLength=0;                 /* forget the last context */
2236    pBiDi->epiLength=0;
2237    pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
2238}
2239
2240#define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
2241#define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
2242
2243static void
2244setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2245                UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
2246    void *runsOnlyMemory;
2247    int32_t *visualMap;
2248    UChar *visualText;
2249    int32_t saveLength, saveTrailingWSStart;
2250    const UBiDiLevel *levels;
2251    UBiDiLevel *saveLevels;
2252    UBiDiDirection saveDirection;
2253    UBool saveMayAllocateText;
2254    Run *runs;
2255    int32_t visualLength, i, j, visualStart, logicalStart,
2256            runCount, runLength, addedRuns, insertRemove,
2257            start, limit, step, indexOddBit, logicalPos,
2258            index0, index1;
2259    uint32_t saveOptions;
2260
2261    pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2262    if(length==0) {
2263        ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2264        goto cleanup3;
2265    }
2266    /* obtain memory for mapping table and visual text */
2267    runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
2268    if(runsOnlyMemory==NULL) {
2269        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2270        goto cleanup3;
2271    }
2272    visualMap=runsOnlyMemory;
2273    visualText=(UChar *)&visualMap[length];
2274    saveLevels=(UBiDiLevel *)&visualText[length];
2275    saveOptions=pBiDi->reorderingOptions;
2276    if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2277        pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2278        pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2279    }
2280    paraLevel&=1;                       /* accept only 0 or 1 */
2281    ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2282    if(U_FAILURE(*pErrorCode)) {
2283        goto cleanup3;
2284    }
2285    /* we cannot access directly pBiDi->levels since it is not yet set if
2286     * direction is not MIXED
2287     */
2288    levels=ubidi_getLevels(pBiDi, pErrorCode);
2289    uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
2290    saveTrailingWSStart=pBiDi->trailingWSStart;
2291    saveLength=pBiDi->length;
2292    saveDirection=pBiDi->direction;
2293
2294    /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2295     * the visual map and the dirProps array to drive the second call
2296     * to ubidi_setPara (but must make provision for possible removal of
2297     * BiDi controls.  Alternatively, only use the dirProps array via
2298     * customized classifier callback.
2299     */
2300    visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2301                                      UBIDI_DO_MIRRORING, pErrorCode);
2302    ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2303    if(U_FAILURE(*pErrorCode)) {
2304        goto cleanup2;
2305    }
2306    pBiDi->reorderingOptions=saveOptions;
2307
2308    pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2309    paraLevel^=1;
2310    /* Because what we did with reorderingOptions, visualText may be shorter
2311     * than the original text. But we don't want the levels memory to be
2312     * reallocated shorter than the original length, since we need to restore
2313     * the levels as after the first call to ubidi_setpara() before returning.
2314     * We will force mayAllocateText to FALSE before the second call to
2315     * ubidi_setpara(), and will restore it afterwards.
2316     */
2317    saveMayAllocateText=pBiDi->mayAllocateText;
2318    pBiDi->mayAllocateText=FALSE;
2319    ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
2320    pBiDi->mayAllocateText=saveMayAllocateText;
2321    ubidi_getRuns(pBiDi, pErrorCode);
2322    if(U_FAILURE(*pErrorCode)) {
2323        goto cleanup1;
2324    }
2325    /* check if some runs must be split, count how many splits */
2326    addedRuns=0;
2327    runCount=pBiDi->runCount;
2328    runs=pBiDi->runs;
2329    visualStart=0;
2330    for(i=0; i<runCount; i++, visualStart+=runLength) {
2331        runLength=runs[i].visualLimit-visualStart;
2332        if(runLength<2) {
2333            continue;
2334        }
2335        logicalStart=GET_INDEX(runs[i].logicalStart);
2336        for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2337            index0=visualMap[j];
2338            index1=visualMap[j-1];
2339            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2340                addedRuns++;
2341            }
2342        }
2343    }
2344    if(addedRuns) {
2345        if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2346            if(runCount==1) {
2347                /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2348                pBiDi->runsMemory[0]=runs[0];
2349            }
2350            runs=pBiDi->runs=pBiDi->runsMemory;
2351            pBiDi->runCount+=addedRuns;
2352        } else {
2353            goto cleanup1;
2354        }
2355    }
2356    /* split runs which are not consecutive in source text */
2357    for(i=runCount-1; i>=0; i--) {
2358        runLength= i==0 ? runs[0].visualLimit :
2359                          runs[i].visualLimit-runs[i-1].visualLimit;
2360        logicalStart=runs[i].logicalStart;
2361        indexOddBit=GET_ODD_BIT(logicalStart);
2362        logicalStart=GET_INDEX(logicalStart);
2363        if(runLength<2) {
2364            if(addedRuns) {
2365                runs[i+addedRuns]=runs[i];
2366            }
2367            logicalPos=visualMap[logicalStart];
2368            runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2369                                            saveLevels[logicalPos]^indexOddBit);
2370            continue;
2371        }
2372        if(indexOddBit) {
2373            start=logicalStart;
2374            limit=logicalStart+runLength-1;
2375            step=1;
2376        } else {
2377            start=logicalStart+runLength-1;
2378            limit=logicalStart;
2379            step=-1;
2380        }
2381        for(j=start; j!=limit; j+=step) {
2382            index0=visualMap[j];
2383            index1=visualMap[j+step];
2384            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2385                logicalPos=BIDI_MIN(visualMap[start], index0);
2386                runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2387                                            saveLevels[logicalPos]^indexOddBit);
2388                runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2389                runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2390                insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2391                runs[i+addedRuns].insertRemove=insertRemove;
2392                runs[i].insertRemove&=~insertRemove;
2393                start=j+step;
2394                addedRuns--;
2395            }
2396        }
2397        if(addedRuns) {
2398            runs[i+addedRuns]=runs[i];
2399        }
2400        logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2401        runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2402                                            saveLevels[logicalPos]^indexOddBit);
2403    }
2404
2405  cleanup1:
2406    /* restore initial paraLevel */
2407    pBiDi->paraLevel^=1;
2408  cleanup2:
2409    /* restore real text */
2410    pBiDi->text=text;
2411    pBiDi->length=saveLength;
2412    pBiDi->originalLength=length;
2413    pBiDi->direction=saveDirection;
2414    /* the saved levels should never excess levelsSize, but we check anyway */
2415    if(saveLength>pBiDi->levelsSize) {
2416        saveLength=pBiDi->levelsSize;
2417    }
2418    uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
2419    pBiDi->trailingWSStart=saveTrailingWSStart;
2420    /* free memory for mapping table and visual text */
2421    uprv_free(runsOnlyMemory);
2422    if(pBiDi->runCount>1) {
2423        pBiDi->direction=UBIDI_MIXED;
2424    }
2425  cleanup3:
2426    pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2427}
2428
2429/* ubidi_setPara ------------------------------------------------------------ */
2430
2431U_CAPI void U_EXPORT2
2432ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2433              UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2434              UErrorCode *pErrorCode) {
2435    UBiDiDirection direction;
2436
2437    /* check the argument values */
2438    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2439    if(pBiDi==NULL || text==NULL || length<-1 ||
2440       (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2441        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2442        return;
2443    }
2444
2445    if(length==-1) {
2446        length=u_strlen(text);
2447    }
2448
2449    /* special treatment for RUNS_ONLY mode */
2450    if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2451        setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2452        return;
2453    }
2454
2455    /* initialize the UBiDi structure */
2456    pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
2457    pBiDi->text=text;
2458    pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2459    pBiDi->paraLevel=paraLevel;
2460    pBiDi->direction=paraLevel&1;
2461    pBiDi->paraCount=1;
2462
2463    pBiDi->dirProps=NULL;
2464    pBiDi->levels=NULL;
2465    pBiDi->runs=NULL;
2466    pBiDi->insertPoints.size=0;         /* clean up from last call */
2467    pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
2468
2469    /*
2470     * Save the original paraLevel if contextual; otherwise, set to 0.
2471     */
2472    pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2473
2474    if(length==0) {
2475        /*
2476         * For an empty paragraph, create a UBiDi object with the paraLevel and
2477         * the flags and the direction set but without allocating zero-length arrays.
2478         * There is nothing more to do.
2479         */
2480        if(IS_DEFAULT_LEVEL(paraLevel)) {
2481            pBiDi->paraLevel&=1;
2482            pBiDi->defaultParaLevel=0;
2483        }
2484        pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2485        pBiDi->runCount=0;
2486        pBiDi->paraCount=0;
2487        setParaSuccess(pBiDi);          /* mark successful setPara */
2488        return;
2489    }
2490
2491    pBiDi->runCount=-1;
2492
2493    /* allocate paras memory */
2494    if(pBiDi->parasMemory)
2495        pBiDi->paras=pBiDi->parasMemory;
2496    else
2497        pBiDi->paras=pBiDi->simpleParas;
2498
2499    /*
2500     * Get the directional properties,
2501     * the flags bit-set, and
2502     * determine the paragraph level if necessary.
2503     */
2504    if(getDirPropsMemory(pBiDi, length)) {
2505        pBiDi->dirProps=pBiDi->dirPropsMemory;
2506        if(!getDirProps(pBiDi)) {
2507            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2508            return;
2509        }
2510    } else {
2511        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2512        return;
2513    }
2514    /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2515    length= pBiDi->length;
2516    pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
2517
2518    /* are explicit levels specified? */
2519    if(embeddingLevels==NULL) {
2520        /* no: determine explicit levels according to the (Xn) rules */\
2521        if(getLevelsMemory(pBiDi, length)) {
2522            pBiDi->levels=pBiDi->levelsMemory;
2523            direction=resolveExplicitLevels(pBiDi, pErrorCode);
2524            if(U_FAILURE(*pErrorCode)) {
2525                return;
2526            }
2527        } else {
2528            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2529            return;
2530        }
2531    } else {
2532        /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2533        pBiDi->levels=embeddingLevels;
2534        direction=checkExplicitLevels(pBiDi, pErrorCode);
2535        if(U_FAILURE(*pErrorCode)) {
2536            return;
2537        }
2538    }
2539
2540    /* allocate isolate memory */
2541    if(pBiDi->isolateCount<=SIMPLE_ISOLATES_SIZE)
2542        pBiDi->isolates=pBiDi->simpleIsolates;
2543    else
2544        if(pBiDi->isolateCount<=pBiDi->isolatesSize)
2545            pBiDi->isolates=pBiDi->isolatesMemory;
2546        else {
2547            if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2548                pBiDi->isolates=pBiDi->isolatesMemory;
2549            } else {
2550                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2551                return;
2552            }
2553        }
2554    pBiDi->isolateCount=-1;             /* current isolates stack entry == none */
2555
2556    /*
2557     * The steps after (X9) in the UBiDi algorithm are performed only if
2558     * the paragraph text has mixed directionality!
2559     */
2560    pBiDi->direction=direction;
2561    switch(direction) {
2562    case UBIDI_LTR:
2563        /* make sure paraLevel is even */
2564        pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
2565
2566        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2567        pBiDi->trailingWSStart=0;
2568        break;
2569    case UBIDI_RTL:
2570        /* make sure paraLevel is odd */
2571        pBiDi->paraLevel|=1;
2572
2573        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
2574        pBiDi->trailingWSStart=0;
2575        break;
2576    default:
2577        /*
2578         *  Choose the right implicit state table
2579         */
2580        switch(pBiDi->reorderingMode) {
2581        case UBIDI_REORDER_DEFAULT:
2582            pBiDi->pImpTabPair=&impTab_DEFAULT;
2583            break;
2584        case UBIDI_REORDER_NUMBERS_SPECIAL:
2585            pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
2586            break;
2587        case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
2588            pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
2589            break;
2590        case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
2591            pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
2592            break;
2593        case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
2594            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2595                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
2596            } else {
2597                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
2598            }
2599            break;
2600        case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
2601            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
2602                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
2603            } else {
2604                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
2605            }
2606            break;
2607        default:
2608            /* we should never get here */
2609            U_ASSERT(FALSE);
2610            break;
2611        }
2612        /*
2613         * If there are no external levels specified and there
2614         * are no significant explicit level codes in the text,
2615         * then we can treat the entire paragraph as one run.
2616         * Otherwise, we need to perform the following rules on runs of
2617         * the text with the same embedding levels. (X10)
2618         * "Significant" explicit level codes are ones that actually
2619         * affect non-BN characters.
2620         * Examples for "insignificant" ones are empty embeddings
2621         * LRE-PDF, LRE-RLE-PDF-PDF, etc.
2622         */
2623        if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
2624                                   !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
2625            resolveImplicitLevels(pBiDi, 0, length,
2626                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
2627                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
2628        } else {
2629            /* sor, eor: start and end types of same-level-run */
2630            UBiDiLevel *levels=pBiDi->levels;
2631            int32_t start, limit=0;
2632            UBiDiLevel level, nextLevel;
2633            DirProp sor, eor;
2634
2635            /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2636            level=GET_PARALEVEL(pBiDi, 0);
2637            nextLevel=levels[0];
2638            if(level<nextLevel) {
2639                eor=GET_LR_FROM_LEVEL(nextLevel);
2640            } else {
2641                eor=GET_LR_FROM_LEVEL(level);
2642            }
2643
2644            do {
2645                /* determine start and limit of the run (end points just behind the run) */
2646
2647                /* the values for this run's start are the same as for the previous run's end */
2648                start=limit;
2649                level=nextLevel;
2650                if((start>0) && (pBiDi->dirProps[start-1]==B)) {
2651                    /* except if this is a new paragraph, then set sor = para level */
2652                    sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
2653                } else {
2654                    sor=eor;
2655                }
2656
2657                /* search for the limit of this run */
2658                while(++limit<length && levels[limit]==level) {}
2659
2660                /* get the correct level of the next run */
2661                if(limit<length) {
2662                    nextLevel=levels[limit];
2663                } else {
2664                    nextLevel=GET_PARALEVEL(pBiDi, length-1);
2665                }
2666
2667                /* determine eor from max(level, nextLevel); sor is last run's eor */
2668                if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
2669                    eor=GET_LR_FROM_LEVEL(nextLevel);
2670                } else {
2671                    eor=GET_LR_FROM_LEVEL(level);
2672                }
2673
2674                /* if the run consists of overridden directional types, then there
2675                   are no implicit types to be resolved */
2676                if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2677                    resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2678                } else {
2679                    /* remove the UBIDI_LEVEL_OVERRIDE flags */
2680                    do {
2681                        levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2682                    } while(start<limit);
2683                }
2684            } while(limit<length);
2685        }
2686        /* check if we got any memory shortage while adding insert points */
2687        if (U_FAILURE(pBiDi->insertPoints.errorCode))
2688        {
2689            *pErrorCode=pBiDi->insertPoints.errorCode;
2690            return;
2691        }
2692        /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2693        adjustWSLevels(pBiDi);
2694        break;
2695    }
2696    /* add RLM for inverse Bidi with contextual orientation resolving
2697     * to RTL which would not round-trip otherwise
2698     */
2699    if((pBiDi->defaultParaLevel>0) &&
2700       (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2701       ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2702        (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2703        int32_t i, j, start, last;
2704        UBiDiLevel level;
2705        DirProp dirProp;
2706        for(i=0; i<pBiDi->paraCount; i++) {
2707            last=(pBiDi->paras[i].limit)-1;
2708            level=pBiDi->paras[i].level;
2709            if(level==0)
2710                continue;           /* LTR paragraph */
2711            start= i==0 ? 0 : pBiDi->paras[i-1].limit;
2712            for(j=last; j>=start; j--) {
2713                dirProp=pBiDi->dirProps[j];
2714                if(dirProp==L) {
2715                    if(j<last) {
2716                        while(pBiDi->dirProps[last]==B) {
2717                            last--;
2718                        }
2719                    }
2720                    addPoint(pBiDi, last, RLM_BEFORE);
2721                    break;
2722                }
2723                if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2724                    break;
2725                }
2726            }
2727        }
2728    }
2729
2730    if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2731        pBiDi->resultLength -= pBiDi->controlCount;
2732    } else {
2733        pBiDi->resultLength += pBiDi->insertPoints.size;
2734    }
2735    setParaSuccess(pBiDi);              /* mark successful setPara */
2736}
2737
2738U_CAPI void U_EXPORT2
2739ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2740    if(pBiDi!=NULL) {
2741        pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2742    }
2743}
2744
2745U_CAPI UBool U_EXPORT2
2746ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2747    if(pBiDi!=NULL) {
2748        return pBiDi->orderParagraphsLTR;
2749    } else {
2750        return FALSE;
2751    }
2752}
2753
2754U_CAPI UBiDiDirection U_EXPORT2
2755ubidi_getDirection(const UBiDi *pBiDi) {
2756    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2757        return pBiDi->direction;
2758    } else {
2759        return UBIDI_LTR;
2760    }
2761}
2762
2763U_CAPI const UChar * U_EXPORT2
2764ubidi_getText(const UBiDi *pBiDi) {
2765    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2766        return pBiDi->text;
2767    } else {
2768        return NULL;
2769    }
2770}
2771
2772U_CAPI int32_t U_EXPORT2
2773ubidi_getLength(const UBiDi *pBiDi) {
2774    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2775        return pBiDi->originalLength;
2776    } else {
2777        return 0;
2778    }
2779}
2780
2781U_CAPI int32_t U_EXPORT2
2782ubidi_getProcessedLength(const UBiDi *pBiDi) {
2783    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2784        return pBiDi->length;
2785    } else {
2786        return 0;
2787    }
2788}
2789
2790U_CAPI int32_t U_EXPORT2
2791ubidi_getResultLength(const UBiDi *pBiDi) {
2792    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2793        return pBiDi->resultLength;
2794    } else {
2795        return 0;
2796    }
2797}
2798
2799/* paragraphs API functions ------------------------------------------------- */
2800
2801U_CAPI UBiDiLevel U_EXPORT2
2802ubidi_getParaLevel(const UBiDi *pBiDi) {
2803    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2804        return pBiDi->paraLevel;
2805    } else {
2806        return 0;
2807    }
2808}
2809
2810U_CAPI int32_t U_EXPORT2
2811ubidi_countParagraphs(UBiDi *pBiDi) {
2812    if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2813        return 0;
2814    } else {
2815        return pBiDi->paraCount;
2816    }
2817}
2818
2819U_CAPI void U_EXPORT2
2820ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2821                          int32_t *pParaStart, int32_t *pParaLimit,
2822                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2823    int32_t paraStart;
2824
2825    /* check the argument values */
2826    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2827    RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2828    RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2829
2830    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2831    if(paraIndex) {
2832        paraStart=pBiDi->paras[paraIndex-1].limit;
2833    } else {
2834        paraStart=0;
2835    }
2836    if(pParaStart!=NULL) {
2837        *pParaStart=paraStart;
2838    }
2839    if(pParaLimit!=NULL) {
2840        *pParaLimit=pBiDi->paras[paraIndex].limit;
2841    }
2842    if(pParaLevel!=NULL) {
2843        *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2844    }
2845}
2846
2847U_CAPI int32_t U_EXPORT2
2848ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2849                          int32_t *pParaStart, int32_t *pParaLimit,
2850                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2851    int32_t paraIndex;
2852
2853    /* check the argument values */
2854    /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2855    RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2856    RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2857    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2858    RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2859
2860    for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
2861    ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2862    return paraIndex;
2863}
2864
2865U_CAPI void U_EXPORT2
2866ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2867                       const void *newContext, UBiDiClassCallback **oldFn,
2868                       const void **oldContext, UErrorCode *pErrorCode)
2869{
2870    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2871    if(pBiDi==NULL) {
2872        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2873        return;
2874    }
2875    if( oldFn )
2876    {
2877        *oldFn = pBiDi->fnClassCallback;
2878    }
2879    if( oldContext )
2880    {
2881        *oldContext = pBiDi->coClassCallback;
2882    }
2883    pBiDi->fnClassCallback = newFn;
2884    pBiDi->coClassCallback = newContext;
2885}
2886
2887U_CAPI void U_EXPORT2
2888ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
2889{
2890    if(pBiDi==NULL) {
2891        return;
2892    }
2893    if( fn )
2894    {
2895        *fn = pBiDi->fnClassCallback;
2896    }
2897    if( context )
2898    {
2899        *context = pBiDi->coClassCallback;
2900    }
2901}
2902
2903U_CAPI UCharDirection U_EXPORT2
2904ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
2905{
2906    UCharDirection dir;
2907
2908    if( pBiDi->fnClassCallback == NULL ||
2909        (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
2910    {
2911        dir = ubidi_getClass(pBiDi->bdp, c);
2912    }
2913    if(dir >= U_CHAR_DIRECTION_COUNT) {
2914        dir = ON;
2915    }
2916    return dir;
2917}
2918