1/*
2******************************************************************************
3*
4*   Copyright (C) 1999-2010, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  ubidi.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 1999jul27
14*   created by: Markus W. Scherer, updated by Matitiahu Allouche
15*/
16
17#include "cmemory.h"
18#include "unicode/utypes.h"
19#include "unicode/ustring.h"
20#include "unicode/uchar.h"
21#include "unicode/ubidi.h"
22#include "ubidi_props.h"
23#include "ubidiimp.h"
24#include "uassert.h"
25
26/*
27 * General implementation notes:
28 *
29 * Throughout the implementation, there are comments like (W2) that refer to
30 * rules of the BiDi algorithm in its version 5, in this example to the second
31 * rule of the resolution of weak types.
32 *
33 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
34 * character according to UTF-16, the second UChar gets the directional property of
35 * the entire character assigned, while the first one gets a BN, a boundary
36 * neutral, type, which is ignored by most of the algorithm according to
37 * rule (X9) and the implementation suggestions of the BiDi algorithm.
38 *
39 * Later, adjustWSLevels() will set the level for each BN to that of the
40 * following character (UChar), which results in surrogate pairs getting the
41 * same level on each of their surrogates.
42 *
43 * In a UTF-8 implementation, the same thing could be done: the last byte of
44 * a multi-byte sequence would get the "real" property, while all previous
45 * bytes of that sequence would get BN.
46 *
47 * It is not possible to assign all those parts of a character the same real
48 * property because this would fail in the resolution of weak types with rules
49 * that look at immediately surrounding types.
50 *
51 * As a related topic, this implementation does not remove Boundary Neutral
52 * types from the input, but ignores them wherever this is relevant.
53 * For example, the loop for the resolution of the weak types reads
54 * types until it finds a non-BN.
55 * Also, explicit embedding codes are neither changed into BN nor removed.
56 * They are only treated the same way real BNs are.
57 * As stated before, adjustWSLevels() takes care of them at the end.
58 * For the purpose of conformance, the levels of all these codes
59 * do not matter.
60 *
61 * Note that this implementation never modifies the dirProps
62 * after the initial setup.
63 *
64 *
65 * In this implementation, the resolution of weak types (Wn),
66 * neutrals (Nn), and the assignment of the resolved level (In)
67 * are all done in one single loop, in resolveImplicitLevels().
68 * Changes of dirProp values are done on the fly, without writing
69 * them back to the dirProps array.
70 *
71 *
72 * This implementation contains code that allows to bypass steps of the
73 * algorithm that are not needed on the specific paragraph
74 * in order to speed up the most common cases considerably,
75 * like text that is entirely LTR, or RTL text without numbers.
76 *
77 * Most of this is done by setting a bit for each directional property
78 * in a flags variable and later checking for whether there are
79 * any LTR characters or any RTL characters, or both, whether
80 * there are any explicit embedding codes, etc.
81 *
82 * If the (Xn) steps are performed, then the flags are re-evaluated,
83 * because they will then not contain the embedding codes any more
84 * and will be adjusted for override codes, so that subsequently
85 * more bypassing may be possible than what the initial flags suggested.
86 *
87 * If the text is not mixed-directional, then the
88 * algorithm steps for the weak type resolution are not performed,
89 * and all levels are set to the paragraph level.
90 *
91 * If there are no explicit embedding codes, then the (Xn) steps
92 * are not performed.
93 *
94 * If embedding levels are supplied as a parameter, then all
95 * explicit embedding codes are ignored, and the (Xn) steps
96 * are not performed.
97 *
98 * White Space types could get the level of the run they belong to,
99 * and are checked with a test of (flags&MASK_EMBEDDING) to
100 * consider if the paragraph direction should be considered in
101 * the flags variable.
102 *
103 * If there are no White Space types in the paragraph, then
104 * (L1) is not necessary in adjustWSLevels().
105 */
106
107/* to avoid some conditional statements, use tiny constant arrays */
108static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
109static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
110static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
111
112#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
113#define DIRPROP_FLAG_E(level) flagE[(level)&1]
114#define DIRPROP_FLAG_O(level) flagO[(level)&1]
115
116/* UBiDi object management -------------------------------------------------- */
117
118U_CAPI UBiDi * U_EXPORT2
119ubidi_open(void)
120{
121    UErrorCode errorCode=U_ZERO_ERROR;
122    return ubidi_openSized(0, 0, &errorCode);
123}
124
125U_CAPI UBiDi * U_EXPORT2
126ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
127    UBiDi *pBiDi;
128
129    /* check the argument values */
130    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
131        return NULL;
132    } else if(maxLength<0 || maxRunCount<0) {
133        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
134        return NULL;    /* invalid arguments */
135    }
136
137    /* allocate memory for the object */
138    pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
139    if(pBiDi==NULL) {
140        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
141        return NULL;
142    }
143
144    /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
145    uprv_memset(pBiDi, 0, sizeof(UBiDi));
146
147    /* get BiDi properties */
148    pBiDi->bdp=ubidi_getSingleton();
149
150    /* allocate memory for arrays as requested */
151    if(maxLength>0) {
152        if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
153            !getInitialLevelsMemory(pBiDi, maxLength)
154        ) {
155            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
156        }
157    } else {
158        pBiDi->mayAllocateText=TRUE;
159    }
160
161    if(maxRunCount>0) {
162        if(maxRunCount==1) {
163            /* use simpleRuns[] */
164            pBiDi->runsSize=sizeof(Run);
165        } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
166            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
167        }
168    } else {
169        pBiDi->mayAllocateRuns=TRUE;
170    }
171
172    if(U_SUCCESS(*pErrorCode)) {
173        return pBiDi;
174    } else {
175        ubidi_close(pBiDi);
176        return NULL;
177    }
178}
179
180/*
181 * We are allowed to allocate memory if memory==NULL or
182 * mayAllocate==TRUE for each array that we need.
183 * We also try to grow memory as needed if we
184 * allocate it.
185 *
186 * Assume sizeNeeded>0.
187 * If *pMemory!=NULL, then assume *pSize>0.
188 *
189 * ### this realloc() may unnecessarily copy the old data,
190 * which we know we don't need any more;
191 * is this the best way to do this??
192 */
193U_CFUNC UBool
194ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
195    void **pMemory = (void **)bidiMem;
196    /* check for existing memory */
197    if(*pMemory==NULL) {
198        /* we need to allocate memory */
199        if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
200            *pSize=sizeNeeded;
201            return TRUE;
202        } else {
203            return FALSE;
204        }
205    } else {
206        if(sizeNeeded<=*pSize) {
207            /* there is already enough memory */
208            return TRUE;
209        }
210        else if(!mayAllocate) {
211            /* not enough memory, and we must not allocate */
212            return FALSE;
213        } else {
214            /* we try to grow */
215            void *memory;
216            /* in most cases, we do not need the copy-old-data part of
217             * realloc, but it is needed when adding runs using getRunsMemory()
218             * in setParaRunsOnly()
219             */
220            if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
221                *pMemory=memory;
222                *pSize=sizeNeeded;
223                return TRUE;
224            } else {
225                /* we failed to grow */
226                return FALSE;
227            }
228        }
229    }
230}
231
232U_CAPI void U_EXPORT2
233ubidi_close(UBiDi *pBiDi) {
234    if(pBiDi!=NULL) {
235        pBiDi->pParaBiDi=NULL;          /* in case one tries to reuse this block */
236        if(pBiDi->dirPropsMemory!=NULL) {
237            uprv_free(pBiDi->dirPropsMemory);
238        }
239        if(pBiDi->levelsMemory!=NULL) {
240            uprv_free(pBiDi->levelsMemory);
241        }
242        if(pBiDi->runsMemory!=NULL) {
243            uprv_free(pBiDi->runsMemory);
244        }
245        if(pBiDi->parasMemory!=NULL) {
246            uprv_free(pBiDi->parasMemory);
247        }
248        if(pBiDi->insertPoints.points!=NULL) {
249            uprv_free(pBiDi->insertPoints.points);
250        }
251
252        uprv_free(pBiDi);
253    }
254}
255
256/* set to approximate "inverse BiDi" ---------------------------------------- */
257
258U_CAPI void U_EXPORT2
259ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
260    if(pBiDi!=NULL) {
261        pBiDi->isInverse=isInverse;
262        pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
263                                          : UBIDI_REORDER_DEFAULT;
264    }
265}
266
267U_CAPI UBool U_EXPORT2
268ubidi_isInverse(UBiDi *pBiDi) {
269    if(pBiDi!=NULL) {
270        return pBiDi->isInverse;
271    } else {
272        return FALSE;
273    }
274}
275
276/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
277 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
278 * concept of RUNS_ONLY which is a double operation.
279 * It could be advantageous to divide this into 3 concepts:
280 * a) Operation: direct / inverse / RUNS_ONLY
281 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
282 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
283 * This would allow combinations not possible today like RUNS_ONLY with
284 * NUMBERS_SPECIAL.
285 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
286 * REMOVE_CONTROLS for the inverse step.
287 * Not all combinations would be supported, and probably not all do make sense.
288 * This would need to document which ones are supported and what are the
289 * fallbacks for unsupported combinations.
290 */
291U_CAPI void U_EXPORT2
292ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
293    if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
294                        && (reorderingMode < UBIDI_REORDER_COUNT)) {
295        pBiDi->reorderingMode = reorderingMode;
296        pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
297    }
298}
299
300U_CAPI UBiDiReorderingMode U_EXPORT2
301ubidi_getReorderingMode(UBiDi *pBiDi) {
302    if (pBiDi!=NULL) {
303        return pBiDi->reorderingMode;
304    } else {
305        return UBIDI_REORDER_DEFAULT;
306    }
307}
308
309U_CAPI void U_EXPORT2
310ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
311    if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
312        reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
313    }
314    if (pBiDi!=NULL) {
315        pBiDi->reorderingOptions=reorderingOptions;
316    }
317}
318
319U_CAPI uint32_t U_EXPORT2
320ubidi_getReorderingOptions(UBiDi *pBiDi) {
321    if (pBiDi!=NULL) {
322        return pBiDi->reorderingOptions;
323    } else {
324        return 0;
325    }
326}
327
328U_CAPI UBiDiDirection U_EXPORT2
329ubidi_getBaseDirection(const UChar *text,
330int32_t length){
331
332    int32_t i;
333    UChar32 uchar;
334    UCharDirection dir;
335
336    if( text==NULL || length<-1 ){
337        return UBIDI_NEUTRAL;
338    }
339
340    if(length==-1) {
341        length=u_strlen(text);
342    }
343
344    for( i = 0 ; i < length; ) {
345        /* i is incremented by U16_NEXT */
346        U16_NEXT(text, i, length, uchar);
347        dir = u_charDirection(uchar);
348        if( dir == U_LEFT_TO_RIGHT )
349                return UBIDI_LTR;
350        if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
351                return UBIDI_RTL;
352    }
353    return UBIDI_NEUTRAL;
354}
355
356/* perform (P2)..(P3) ------------------------------------------------------- */
357
358/*
359 * Get the directional properties for the text,
360 * calculate the flags bit-set, and
361 * determine the paragraph level if necessary.
362 */
363static void
364getDirProps(UBiDi *pBiDi) {
365    const UChar *text=pBiDi->text;
366    DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
367
368    int32_t i=0, i1, length=pBiDi->originalLength;
369    Flags flags=0;      /* collect all directionalities in the text */
370    UChar32 uchar;
371    DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */
372    UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
373    /* for inverse BiDi, the default para level is set to RTL if there is a
374       strong R or AL character at either end of the text                           */
375    UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
376            (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
377             pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
378    int32_t lastArabicPos=-1;
379    int32_t controlCount=0;
380    UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
381                                       UBIDI_OPTION_REMOVE_CONTROLS);
382
383    typedef enum {
384         NOT_CONTEXTUAL,                /* 0: not contextual paraLevel */
385         LOOKING_FOR_STRONG,            /* 1: looking for first strong char */
386         FOUND_STRONG_CHAR              /* 2: found first strong char       */
387    } State;
388    State state;
389    int32_t paraStart=0;                /* index of first char in paragraph */
390    DirProp paraDir;                    /* == CONTEXT_RTL within paragraphs
391                                           starting with strong R char      */
392    DirProp lastStrongDir=0;            /* for default level & inverse BiDi */
393    int32_t lastStrongLTR=0;            /* for STREAMING option             */
394
395    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
396        pBiDi->length=0;
397        lastStrongLTR=0;
398    }
399    if(isDefaultLevel) {
400        paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0;
401        paraDir=paraDirDefault;
402        lastStrongDir=paraDirDefault;
403        state=LOOKING_FOR_STRONG;
404    } else {
405        state=NOT_CONTEXTUAL;
406        paraDir=0;
407    }
408    /* count paragraphs and determine the paragraph level (P2..P3) */
409    /*
410     * see comment in ubidi.h:
411     * the DEFAULT_XXX values are designed so that
412     * their bit 0 alone yields the intended default
413     */
414    for( /* i=0 above */ ; i<length; ) {
415        /* i is incremented by U16_NEXT */
416        U16_NEXT(text, i, length, uchar);
417        flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
418        dirProps[i-1]=dirProp|paraDir;
419        if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
420            flags|=DIRPROP_FLAG(BN);
421            dirProps[i-2]=(DirProp)(BN|paraDir);
422        }
423        if(state==LOOKING_FOR_STRONG) {
424            if(dirProp==L) {
425                state=FOUND_STRONG_CHAR;
426                if(paraDir) {
427                    paraDir=0;
428                    for(i1=paraStart; i1<i; i1++) {
429                        dirProps[i1]&=~CONTEXT_RTL;
430                    }
431                }
432                continue;
433            }
434            if(dirProp==R || dirProp==AL) {
435                state=FOUND_STRONG_CHAR;
436                if(paraDir==0) {
437                    paraDir=CONTEXT_RTL;
438                    for(i1=paraStart; i1<i; i1++) {
439                        dirProps[i1]|=CONTEXT_RTL;
440                    }
441                }
442                continue;
443            }
444        }
445        if(dirProp==L) {
446            lastStrongDir=0;
447            lastStrongLTR=i;            /* i is index to next character */
448        }
449        else if(dirProp==R) {
450            lastStrongDir=CONTEXT_RTL;
451        }
452        else if(dirProp==AL) {
453            lastStrongDir=CONTEXT_RTL;
454            lastArabicPos=i-1;
455        }
456        else if(dirProp==B) {
457            if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
458                pBiDi->length=i;        /* i is index to next character */
459            }
460            if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
461                for( ; paraStart<i; paraStart++) {
462                    dirProps[paraStart]|=CONTEXT_RTL;
463                }
464            }
465            if(i<length) {              /* B not last char in text */
466                if(!((uchar==CR) && (text[i]==LF))) {
467                    pBiDi->paraCount++;
468                }
469                if(isDefaultLevel) {
470                    state=LOOKING_FOR_STRONG;
471                    paraStart=i;        /* i is index to next character */
472                    paraDir=paraDirDefault;
473                    lastStrongDir=paraDirDefault;
474                }
475            }
476        }
477        if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) {
478            controlCount++;
479        }
480    }
481    if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
482        for(i1=paraStart; i1<length; i1++) {
483            dirProps[i1]|=CONTEXT_RTL;
484        }
485    }
486    if(isDefaultLevel) {
487        pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0);
488    }
489    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
490        if((lastStrongLTR>pBiDi->length) &&
491           (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) {
492            pBiDi->length = lastStrongLTR;
493        }
494        if(pBiDi->length<pBiDi->originalLength) {
495            pBiDi->paraCount--;
496        }
497    }
498    /* The following line does nothing new for contextual paraLevel, but is
499       needed for absolute paraLevel.                               */
500    flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
501
502    if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
503        flags|=DIRPROP_FLAG(L);
504    }
505
506    pBiDi->controlCount = controlCount;
507    pBiDi->flags=flags;
508    pBiDi->lastArabicPos=lastArabicPos;
509}
510
511/* perform (X1)..(X9) ------------------------------------------------------- */
512
513/* determine if the text is mixed-directional or single-directional */
514static UBiDiDirection
515directionFromFlags(UBiDi *pBiDi) {
516    Flags flags=pBiDi->flags;
517    /* if the text contains AN and neutrals, then some neutrals may become RTL */
518    if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
519        return UBIDI_LTR;
520    } else if(!(flags&MASK_LTR)) {
521        return UBIDI_RTL;
522    } else {
523        return UBIDI_MIXED;
524    }
525}
526
527/*
528 * Resolve the explicit levels as specified by explicit embedding codes.
529 * Recalculate the flags to have them reflect the real properties
530 * after taking the explicit embeddings into account.
531 *
532 * The BiDi algorithm is designed to result in the same behavior whether embedding
533 * levels are externally specified (from "styled text", supposedly the preferred
534 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
535 * That is why (X9) instructs to remove all explicit codes (and BN).
536 * However, in a real implementation, this removal of these codes and their index
537 * positions in the plain text is undesirable since it would result in
538 * reallocated, reindexed text.
539 * Instead, this implementation leaves the codes in there and just ignores them
540 * in the subsequent processing.
541 * In order to get the same reordering behavior, positions with a BN or an
542 * explicit embedding code just get the same level assigned as the last "real"
543 * character.
544 *
545 * Some implementations, not this one, then overwrite some of these
546 * directionality properties at "real" same-level-run boundaries by
547 * L or R codes so that the resolution of weak types can be performed on the
548 * entire paragraph at once instead of having to parse it once more and
549 * perform that resolution on same-level-runs.
550 * This limits the scope of the implicit rules in effectively
551 * the same way as the run limits.
552 *
553 * Instead, this implementation does not modify these codes.
554 * On one hand, the paragraph has to be scanned for same-level-runs, but
555 * on the other hand, this saves another loop to reset these codes,
556 * or saves making and modifying a copy of dirProps[].
557 *
558 *
559 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
560 *
561 *
562 * Handling the stack of explicit levels (Xn):
563 *
564 * With the BiDi stack of explicit levels,
565 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
566 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
567 *
568 * In order to have a correct push-pop semantics even in the case of overflows,
569 * there are two overflow counters:
570 * - countOver60 is incremented with each LRx at level 60
571 * - from level 60, one RLx increases the level to 61
572 * - countOver61 is incremented with each LRx and RLx at level 61
573 *
574 * Popping levels with PDF must work in the opposite order so that level 61
575 * is correct at the correct point. Underflows (too many PDFs) must be checked.
576 *
577 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
578 */
579static UBiDiDirection
580resolveExplicitLevels(UBiDi *pBiDi) {
581    const DirProp *dirProps=pBiDi->dirProps;
582    UBiDiLevel *levels=pBiDi->levels;
583    const UChar *text=pBiDi->text;
584
585    int32_t i=0, length=pBiDi->length;
586    Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
587    DirProp dirProp;
588    UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
589
590    UBiDiDirection direction;
591    int32_t paraIndex=0;
592
593    /* determine if the text is mixed-directional or single-directional */
594    direction=directionFromFlags(pBiDi);
595
596    /* we may not need to resolve any explicit levels, but for multiple
597       paragraphs we want to loop on all chars to set the para boundaries */
598    if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) {
599        /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
600    } else if((pBiDi->paraCount==1) &&
601              (!(flags&MASK_EXPLICIT) ||
602               (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) {
603        /* mixed, but all characters are at the same embedding level */
604        /* or we are in "inverse BiDi" */
605        /* and we don't have contextual multiple paragraphs with some B char */
606        /* set all levels to the paragraph level */
607        for(i=0; i<length; ++i) {
608            levels[i]=level;
609        }
610    } else {
611        /* continue to perform (Xn) */
612
613        /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
614        /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
615        UBiDiLevel embeddingLevel=level, newLevel, stackTop=0;
616
617        UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL];        /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
618        uint32_t countOver60=0, countOver61=0;  /* count overflows of explicit levels */
619
620        /* recalculate the flags */
621        flags=0;
622
623        for(i=0; i<length; ++i) {
624            dirProp=NO_CONTEXT_RTL(dirProps[i]);
625            switch(dirProp) {
626            case LRE:
627            case LRO:
628                /* (X3, X5) */
629                newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
630                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
631                    stack[stackTop]=embeddingLevel;
632                    ++stackTop;
633                    embeddingLevel=newLevel;
634                    if(dirProp==LRO) {
635                        embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
636                    }
637                    /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE
638                       since this has already been done for newLevel which is
639                       the source for embeddingLevel.
640                     */
641                } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) {
642                    ++countOver61;
643                } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
644                    ++countOver60;
645                }
646                flags|=DIRPROP_FLAG(BN);
647                break;
648            case RLE:
649            case RLO:
650                /* (X2, X4) */
651                newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
652                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
653                    stack[stackTop]=embeddingLevel;
654                    ++stackTop;
655                    embeddingLevel=newLevel;
656                    if(dirProp==RLO) {
657                        embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
658                    }
659                    /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE
660                       since this has already been done for newLevel which is
661                       the source for embeddingLevel.
662                     */
663                } else {
664                    ++countOver61;
665                }
666                flags|=DIRPROP_FLAG(BN);
667                break;
668            case PDF:
669                /* (X7) */
670                /* handle all the overflow cases first */
671                if(countOver61>0) {
672                    --countOver61;
673                } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) {
674                    /* handle LRx overflows from level 60 */
675                    --countOver60;
676                } else if(stackTop>0) {
677                    /* this is the pop operation; it also pops level 61 while countOver60>0 */
678                    --stackTop;
679                    embeddingLevel=stack[stackTop];
680                /* } else { (underflow) */
681                }
682                flags|=DIRPROP_FLAG(BN);
683                break;
684            case B:
685                stackTop=0;
686                countOver60=countOver61=0;
687                level=GET_PARALEVEL(pBiDi, i);
688                if((i+1)<length) {
689                    embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
690                    if(!((text[i]==CR) && (text[i+1]==LF))) {
691                        pBiDi->paras[paraIndex++]=i+1;
692                    }
693                }
694                flags|=DIRPROP_FLAG(B);
695                break;
696            case BN:
697                /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
698                /* they will get their levels set correctly in adjustWSLevels() */
699                flags|=DIRPROP_FLAG(BN);
700                break;
701            default:
702                /* all other types get the "real" level */
703                if(level!=embeddingLevel) {
704                    level=embeddingLevel;
705                    if(level&UBIDI_LEVEL_OVERRIDE) {
706                        flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
707                    } else {
708                        flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
709                    }
710                }
711                if(!(level&UBIDI_LEVEL_OVERRIDE)) {
712                    flags|=DIRPROP_FLAG(dirProp);
713                }
714                break;
715            }
716
717            /*
718             * We need to set reasonable levels even on BN codes and
719             * explicit codes because we will later look at same-level runs (X10).
720             */
721            levels[i]=level;
722        }
723        if(flags&MASK_EMBEDDING) {
724            flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
725        }
726        if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
727            flags|=DIRPROP_FLAG(L);
728        }
729
730        /* subsequently, ignore the explicit codes and BN (X9) */
731
732        /* again, determine if the text is mixed-directional or single-directional */
733        pBiDi->flags=flags;
734        direction=directionFromFlags(pBiDi);
735    }
736
737    return direction;
738}
739
740/*
741 * Use a pre-specified embedding levels array:
742 *
743 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
744 * ignore all explicit codes (X9),
745 * and check all the preset levels.
746 *
747 * Recalculate the flags to have them reflect the real properties
748 * after taking the explicit embeddings into account.
749 */
750static UBiDiDirection
751checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
752    const DirProp *dirProps=pBiDi->dirProps;
753    DirProp dirProp;
754    UBiDiLevel *levels=pBiDi->levels;
755    const UChar *text=pBiDi->text;
756
757    int32_t i, length=pBiDi->length;
758    Flags flags=0;  /* collect all directionalities in the text */
759    UBiDiLevel level;
760    uint32_t paraIndex=0;
761
762    for(i=0; i<length; ++i) {
763        level=levels[i];
764        dirProp=NO_CONTEXT_RTL(dirProps[i]);
765        if(level&UBIDI_LEVEL_OVERRIDE) {
766            /* keep the override flag in levels[i] but adjust the flags */
767            level&=~UBIDI_LEVEL_OVERRIDE;     /* make the range check below simpler */
768            flags|=DIRPROP_FLAG_O(level);
769        } else {
770            /* set the flags */
771            flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
772        }
773        if((level<GET_PARALEVEL(pBiDi, i) &&
774            !((0==level)&&(dirProp==B))) ||
775           (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
776            /* level out of bounds */
777            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
778            return UBIDI_LTR;
779        }
780        if((dirProp==B) && ((i+1)<length)) {
781            if(!((text[i]==CR) && (text[i+1]==LF))) {
782                pBiDi->paras[paraIndex++]=i+1;
783            }
784        }
785    }
786    if(flags&MASK_EMBEDDING) {
787        flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
788    }
789
790    /* determine if the text is mixed-directional or single-directional */
791    pBiDi->flags=flags;
792    return directionFromFlags(pBiDi);
793}
794
795/******************************************************************
796 The Properties state machine table
797*******************************************************************
798
799 All table cells are 8 bits:
800      bits 0..4:  next state
801      bits 5..7:  action to perform (if > 0)
802
803 Cells may be of format "n" where n represents the next state
804 (except for the rightmost column).
805 Cells may also be of format "s(x,y)" where x represents an action
806 to perform and y represents the next state.
807
808*******************************************************************
809 Definitions and type for properties state table
810*******************************************************************
811*/
812#define IMPTABPROPS_COLUMNS 14
813#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
814#define GET_STATEPROPS(cell) ((cell)&0x1f)
815#define GET_ACTIONPROPS(cell) ((cell)>>5)
816#define s(action, newState) ((uint8_t)(newState+(action<<5)))
817
818static const uint8_t groupProp[] =          /* dirProp regrouped */
819{
820/*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  */
821    0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10
822};
823enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
824
825/******************************************************************
826
827      PROPERTIES  STATE  TABLE
828
829 In table impTabProps,
830      - the ON column regroups ON and WS
831      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
832      - the Res column is the reduced property assigned to a run
833
834 Action 1: process current run1, init new run1
835        2: init new run2
836        3: process run1, process run2, init new run1
837        4: process run1, set run1=run2, init new run2
838
839 Notes:
840  1) This table is used in resolveImplicitLevels().
841  2) This table triggers actions when there is a change in the Bidi
842     property of incoming characters (action 1).
843  3) Most such property sequences are processed immediately (in
844     fact, passed to processPropertySeq().
845  4) However, numbers are assembled as one sequence. This means
846     that undefined situations (like CS following digits, until
847     it is known if the next char will be a digit) are held until
848     following chars define them.
849     Example: digits followed by CS, then comes another CS or ON;
850              the digits will be processed, then the CS assigned
851              as the start of an ON sequence (action 3).
852  5) There are cases where more than one sequence must be
853     processed, for instance digits followed by CS followed by L:
854     the digits must be processed as one sequence, and the CS
855     must be processed as an ON sequence, all this before starting
856     assembling chars for the opening L sequence.
857
858
859*/
860static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
861{
862/*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,  Res */
863/* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,  DirProp_ON },
864/* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),   DirProp_L },
865/* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),   DirProp_R },
866/* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,   DirProp_R },
867/* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),  DirProp_EN },
868/* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),  DirProp_AN },
869/* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),  DirProp_AN },
870/* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),  DirProp_ON },
871/* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),  DirProp_ON },
872/* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),  DirProp_ON },
873/*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),  DirProp_EN },
874/*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),  DirProp_EN },
875/*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),  DirProp_AN },
876/*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),  DirProp_AN },
877/*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),  DirProp_ON },
878/*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),   DirProp_S },
879/*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),   DirProp_S },
880/*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),   DirProp_B }
881};
882
883/*  we must undef macro s because the levels table have a different
884 *  structure (4 bits for action and 4 bits for next state.
885 */
886#undef s
887
888/******************************************************************
889 The levels state machine tables
890*******************************************************************
891
892 All table cells are 8 bits:
893      bits 0..3:  next state
894      bits 4..7:  action to perform (if > 0)
895
896 Cells may be of format "n" where n represents the next state
897 (except for the rightmost column).
898 Cells may also be of format "s(x,y)" where x represents an action
899 to perform and y represents the next state.
900
901 This format limits each table to 16 states each and to 15 actions.
902
903*******************************************************************
904 Definitions and type for levels state tables
905*******************************************************************
906*/
907#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
908#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
909#define GET_STATE(cell) ((cell)&0x0f)
910#define GET_ACTION(cell) ((cell)>>4)
911#define s(action, newState) ((uint8_t)(newState+(action<<4)))
912
913typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
914typedef uint8_t ImpAct[];
915
916/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
917 * instead of having a pair of ImpTab and a pair of ImpAct.
918 */
919typedef struct ImpTabPair {
920    const void * pImpTab[2];
921    const void * pImpAct[2];
922} ImpTabPair;
923
924/******************************************************************
925
926      LEVELS  STATE  TABLES
927
928 In all levels state tables,
929      - state 0 is the initial state
930      - the Res column is the increment to add to the text level
931        for this property sequence.
932
933 The impAct arrays for each table of a pair map the local action
934 numbers of the table to the total list of actions. For instance,
935 action 2 in a given table corresponds to the action number which
936 appears in entry [2] of the impAct array for that table.
937 The first entry of all impAct arrays must be 0.
938
939 Action 1: init conditional sequence
940        2: prepend conditional sequence to current sequence
941        3: set ON sequence to new level - 1
942        4: init EN/AN/ON sequence
943        5: fix EN/AN/ON sequence followed by R
944        6: set previous level sequence to level 2
945
946 Notes:
947  1) These tables are used in processPropertySeq(). The input
948     is property sequences as determined by resolveImplicitLevels.
949  2) Most such property sequences are processed immediately
950     (levels are assigned).
951  3) However, some sequences cannot be assigned a final level till
952     one or more following sequences are received. For instance,
953     ON following an R sequence within an even-level paragraph.
954     If the following sequence is R, the ON sequence will be
955     assigned basic run level+1, and so will the R sequence.
956  4) S is generally handled like ON, since its level will be fixed
957     to paragraph level in adjustWSLevels().
958
959*/
960
961static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
962/*  In this table, conditional sequences receive the higher possible level
963    until proven otherwise.
964*/
965{
966/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
967/* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
968/* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
969/* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
970/* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
971/* 4 : R+ON       */ { s(2,0),     1 ,     3 ,     3 ,     4 ,     4 , s(2,0),  1 },
972/* 5 : AN+ON      */ { s(2,0),     1 , s(2,0),     2 ,     5 ,     5 , s(2,0),  1 }
973};
974static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
975/*  In this table, conditional sequences receive the lower possible level
976    until proven otherwise.
977*/
978{
979/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
980/* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
981/* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
982/* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
983/* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
984/* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
985/* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
986};
987static const ImpAct impAct0 = {0,1,2,3,4,5,6};
988static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
989                                           &impTabR_DEFAULT},
990                                          {&impAct0, &impAct0}};
991
992static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
993/*  In this table, conditional sequences receive the higher possible level
994    until proven otherwise.
995*/
996{
997/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
998/* 0 : init       */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  0 },
999/* 1 : L+EN/AN    */ {     0 ,     2 ,    1 ,      1 ,     0 ,     0 ,     0 ,  2 },
1000/* 2 : R          */ {     0 ,     2 ,    4 ,      4 , s(1,3),     0 ,     0 ,  1 },
1001/* 3 : R+ON       */ { s(2,0),     2 ,    4 ,      4 ,     3 ,     3 , s(2,0),  1 },
1002/* 4 : R+EN/AN    */ {     0 ,     2 ,    4 ,      4 , s(1,3), s(1,3),     0 ,  2 }
1003  };
1004static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1005                                                   &impTabR_DEFAULT},
1006                                                  {&impAct0, &impAct0}};
1007
1008static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1009/*  In this table, EN/AN+ON sequences receive levels as if associated with R
1010    until proven that there is L or sor/eor on both sides. AN is handled like EN.
1011*/
1012{
1013/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1014/* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
1015/* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
1016/* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
1017/* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
1018/* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
1019/* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
1020};
1021static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1022/*  In this table, EN/AN+ON sequences receive levels as if associated with R
1023    until proven that there is L on both sides. AN is handled like EN.
1024*/
1025{
1026/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1027/* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1028/* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1029/* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
1030/* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
1031/* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
1032};
1033static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1034                        {&impTabL_GROUP_NUMBERS_WITH_R,
1035                         &impTabR_GROUP_NUMBERS_WITH_R},
1036                        {&impAct0, &impAct0}};
1037
1038
1039static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1040/*  This table is identical to the Default LTR table except that EN and AN are
1041    handled like L.
1042*/
1043{
1044/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1045/* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
1046/* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
1047/* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
1048/* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
1049/* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
1050/* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
1051};
1052static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1053/*  This table is identical to the Default RTL table except that EN and AN are
1054    handled like L.
1055*/
1056{
1057/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1058/* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1059/* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
1060/* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
1061/* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
1062/* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
1063/* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
1064};
1065static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1066                        {&impTabL_INVERSE_NUMBERS_AS_L,
1067                         &impTabR_INVERSE_NUMBERS_AS_L},
1068                        {&impAct0, &impAct0}};
1069
1070static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
1071/*  In this table, conditional sequences receive the lower possible level
1072    until proven otherwise.
1073*/
1074{
1075/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1076/* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
1077/* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
1078/* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
1079/* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
1080/* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
1081/* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
1082/* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
1083};
1084static const ImpAct impAct1 = {0,1,11,12};
1085/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1086 */
1087static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1088                        {&impTabL_DEFAULT,
1089                         &impTabR_INVERSE_LIKE_DIRECT},
1090                        {&impAct0, &impAct1}};
1091
1092static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1093/*  The case handled in this table is (visually):  R EN L
1094*/
1095{
1096/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1097/* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
1098/* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
1099/* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
1100/* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
1101/* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
1102/* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
1103/* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
1104};
1105static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1106/*  The cases handled in this table are (visually):  R EN L
1107                                                     R L AN L
1108*/
1109{
1110/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1111/* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1112/* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
1113/* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
1114/* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
1115/* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
1116/* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
1117/* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
1118};
1119static const ImpAct impAct2 = {0,1,7,8,9,10};
1120static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1121                        {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1122                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1123                        {&impAct0, &impAct2}};
1124
1125static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1126                        {&impTabL_NUMBERS_SPECIAL,
1127                         &impTabR_INVERSE_LIKE_DIRECT},
1128                        {&impAct0, &impAct1}};
1129
1130static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1131/*  The case handled in this table is (visually):  R EN L
1132*/
1133{
1134/*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
1135/* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
1136/* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
1137/* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
1138/* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
1139/* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
1140};
1141static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1142                        {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1143                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1144                        {&impAct0, &impAct2}};
1145
1146#undef s
1147
1148typedef struct {
1149    const ImpTab * pImpTab;             /* level table pointer          */
1150    const ImpAct * pImpAct;             /* action map array             */
1151    int32_t startON;                    /* start of ON sequence         */
1152    int32_t startL2EN;                  /* start of level 2 sequence    */
1153    int32_t lastStrongRTL;              /* index of last found R or AL  */
1154    int32_t state;                      /* current state                */
1155    UBiDiLevel runLevel;                /* run level before implicit solving */
1156} LevState;
1157
1158/*------------------------------------------------------------------------*/
1159
1160static void
1161addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1162  /* param pos:     position where to insert
1163     param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1164  */
1165{
1166#define FIRSTALLOC  10
1167    Point point;
1168    InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1169
1170    if (pInsertPoints->capacity == 0)
1171    {
1172        pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
1173        if (pInsertPoints->points == NULL)
1174        {
1175            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1176            return;
1177        }
1178        pInsertPoints->capacity=FIRSTALLOC;
1179    }
1180    if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1181    {
1182        void * savePoints=pInsertPoints->points;
1183        pInsertPoints->points=uprv_realloc(pInsertPoints->points,
1184                                           pInsertPoints->capacity*2*sizeof(Point));
1185        if (pInsertPoints->points == NULL)
1186        {
1187            pInsertPoints->points=savePoints;
1188            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1189            return;
1190        }
1191        else  pInsertPoints->capacity*=2;
1192    }
1193    point.pos=pos;
1194    point.flag=flag;
1195    pInsertPoints->points[pInsertPoints->size]=point;
1196    pInsertPoints->size++;
1197#undef FIRSTALLOC
1198}
1199
1200/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1201
1202/*
1203 * This implementation of the (Wn) rules applies all rules in one pass.
1204 * In order to do so, it needs a look-ahead of typically 1 character
1205 * (except for W5: sequences of ET) and keeps track of changes
1206 * in a rule Wp that affect a later Wq (p<q).
1207 *
1208 * The (Nn) and (In) rules are also performed in that same single loop,
1209 * but effectively one iteration behind for white space.
1210 *
1211 * Since all implicit rules are performed in one step, it is not necessary
1212 * to actually store the intermediate directional properties in dirProps[].
1213 */
1214
1215static void
1216processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1217                   int32_t start, int32_t limit) {
1218    uint8_t cell, oldStateSeq, actionSeq;
1219    const ImpTab * pImpTab=pLevState->pImpTab;
1220    const ImpAct * pImpAct=pLevState->pImpAct;
1221    UBiDiLevel * levels=pBiDi->levels;
1222    UBiDiLevel level, addLevel;
1223    InsertPoints * pInsertPoints;
1224    int32_t start0, k;
1225
1226    start0=start;                           /* save original start position */
1227    oldStateSeq=(uint8_t)pLevState->state;
1228    cell=(*pImpTab)[oldStateSeq][_prop];
1229    pLevState->state=GET_STATE(cell);       /* isolate the new state */
1230    actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1231    addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1232
1233    if(actionSeq) {
1234        switch(actionSeq) {
1235        case 1:                         /* init ON seq */
1236            pLevState->startON=start0;
1237            break;
1238
1239        case 2:                         /* prepend ON seq to current seq */
1240            start=pLevState->startON;
1241            break;
1242
1243        case 3:                         /* L or S after possible relevant EN/AN */
1244            /* check if we had EN after R/AL */
1245            if (pLevState->startL2EN >= 0) {
1246                addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1247            }
1248            pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
1249            /* check if we had any relevant EN/AN after R/AL */
1250            pInsertPoints=&(pBiDi->insertPoints);
1251            if ((pInsertPoints->capacity == 0) ||
1252                (pInsertPoints->size <= pInsertPoints->confirmed))
1253            {
1254                /* nothing, just clean up */
1255                pLevState->lastStrongRTL=-1;
1256                /* check if we have a pending conditional segment */
1257                level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1258                if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
1259                    start=pLevState->startON;   /* reset to basic run level */
1260                }
1261                if (_prop == DirProp_S)                /* add LRM before S */
1262                {
1263                    addPoint(pBiDi, start0, LRM_BEFORE);
1264                    pInsertPoints->confirmed=pInsertPoints->size;
1265                }
1266                break;
1267            }
1268            /* reset previous RTL cont to level for LTR text */
1269            for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1270            {
1271                /* reset odd level, leave runLevel+2 as is */
1272                levels[k]=(levels[k] - 2) & ~1;
1273            }
1274            /* mark insert points as confirmed */
1275            pInsertPoints->confirmed=pInsertPoints->size;
1276            pLevState->lastStrongRTL=-1;
1277            if (_prop == DirProp_S)            /* add LRM before S */
1278            {
1279                addPoint(pBiDi, start0, LRM_BEFORE);
1280                pInsertPoints->confirmed=pInsertPoints->size;
1281            }
1282            break;
1283
1284        case 4:                         /* R/AL after possible relevant EN/AN */
1285            /* just clean up */
1286            pInsertPoints=&(pBiDi->insertPoints);
1287            if (pInsertPoints->capacity > 0)
1288                /* remove all non confirmed insert points */
1289                pInsertPoints->size=pInsertPoints->confirmed;
1290            pLevState->startON=-1;
1291            pLevState->startL2EN=-1;
1292            pLevState->lastStrongRTL=limit - 1;
1293            break;
1294
1295        case 5:                         /* EN/AN after R/AL + possible cont */
1296            /* check for real AN */
1297            if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) &&
1298                (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1299            {
1300                /* real AN */
1301                if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1302                {
1303                    /* just note the righmost digit as a strong RTL */
1304                    pLevState->lastStrongRTL=limit - 1;
1305                    break;
1306                }
1307                if (pLevState->startL2EN >= 0)  /* after EN, no AN */
1308                {
1309                    addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1310                    pLevState->startL2EN=-2;
1311                }
1312                /* note AN */
1313                addPoint(pBiDi, start0, LRM_BEFORE);
1314                break;
1315            }
1316            /* if first EN/AN after R/AL */
1317            if (pLevState->startL2EN == -1) {
1318                pLevState->startL2EN=start0;
1319            }
1320            break;
1321
1322        case 6:                         /* note location of latest R/AL */
1323            pLevState->lastStrongRTL=limit - 1;
1324            pLevState->startON=-1;
1325            break;
1326
1327        case 7:                         /* L after R+ON/EN/AN */
1328            /* include possible adjacent number on the left */
1329            for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1330            if(k>=0) {
1331                addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
1332                pInsertPoints=&(pBiDi->insertPoints);
1333                pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
1334            }
1335            pLevState->startON=start0;
1336            break;
1337
1338        case 8:                         /* AN after L */
1339            /* AN numbers between L text on both sides may be trouble. */
1340            /* tentatively bracket with LRMs; will be confirmed if followed by L */
1341            addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
1342            addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
1343            break;
1344
1345        case 9:                         /* R after L+ON/EN/AN */
1346            /* false alert, infirm LRMs around previous AN */
1347            pInsertPoints=&(pBiDi->insertPoints);
1348            pInsertPoints->size=pInsertPoints->confirmed;
1349            if (_prop == DirProp_S)            /* add RLM before S */
1350            {
1351                addPoint(pBiDi, start0, RLM_BEFORE);
1352                pInsertPoints->confirmed=pInsertPoints->size;
1353            }
1354            break;
1355
1356        case 10:                        /* L after L+ON/AN */
1357            level=pLevState->runLevel + addLevel;
1358            for(k=pLevState->startON; k<start0; k++) {
1359                if (levels[k]<level)
1360                    levels[k]=level;
1361            }
1362            pInsertPoints=&(pBiDi->insertPoints);
1363            pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
1364            pLevState->startON=start0;
1365            break;
1366
1367        case 11:                        /* L after L+ON+EN/AN/ON */
1368            level=pLevState->runLevel;
1369            for(k=start0-1; k>=pLevState->startON; k--) {
1370                if(levels[k]==level+3) {
1371                    while(levels[k]==level+3) {
1372                        levels[k--]-=2;
1373                    }
1374                    while(levels[k]==level) {
1375                        k--;
1376                    }
1377                }
1378                if(levels[k]==level+2) {
1379                    levels[k]=level;
1380                    continue;
1381                }
1382                levels[k]=level+1;
1383            }
1384            break;
1385
1386        case 12:                        /* R after L+ON+EN/AN/ON */
1387            level=pLevState->runLevel+1;
1388            for(k=start0-1; k>=pLevState->startON; k--) {
1389                if(levels[k]>level) {
1390                    levels[k]-=2;
1391                }
1392            }
1393            break;
1394
1395        default:                        /* we should never get here */
1396            U_ASSERT(FALSE);
1397            break;
1398        }
1399    }
1400    if((addLevel) || (start < start0)) {
1401        level=pLevState->runLevel + addLevel;
1402        for(k=start; k<limit; k++) {
1403            levels[k]=level;
1404        }
1405    }
1406}
1407
1408static void
1409resolveImplicitLevels(UBiDi *pBiDi,
1410                      int32_t start, int32_t limit,
1411                      DirProp sor, DirProp eor) {
1412    const DirProp *dirProps=pBiDi->dirProps;
1413
1414    LevState levState;
1415    int32_t i, start1, start2;
1416    uint8_t oldStateImp, stateImp, actionImp;
1417    uint8_t gprop, resProp, cell;
1418    UBool inverseRTL;
1419    DirProp nextStrongProp=R;
1420    int32_t nextStrongPos=-1;
1421
1422    levState.startON = -1;  /* silence gcc flow analysis */
1423
1424    /* check for RTL inverse BiDi mode */
1425    /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
1426     * loop on the text characters from end to start.
1427     * This would need a different properties state table (at least different
1428     * actions) and different levels state tables (maybe very similar to the
1429     * LTR corresponding ones.
1430     */
1431    inverseRTL=(UBool)
1432        ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
1433         (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT  ||
1434          pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
1435    /* initialize for levels state table */
1436    levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1437    levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1438    levState.state=0;
1439    levState.runLevel=pBiDi->levels[start];
1440    levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
1441    levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
1442    processPropertySeq(pBiDi, &levState, sor, start, start);
1443    /* initialize for property state table */
1444    if(NO_CONTEXT_RTL(dirProps[start])==NSM) {
1445        stateImp = 1 + sor;
1446    } else {
1447        stateImp=0;
1448    }
1449    start1=start;
1450    start2=start;
1451
1452    for(i=start; i<=limit; i++) {
1453        if(i>=limit) {
1454            gprop=eor;
1455        } else {
1456            DirProp prop, prop1;
1457            prop=NO_CONTEXT_RTL(dirProps[i]);
1458            if(inverseRTL) {
1459                if(prop==AL) {
1460                    /* AL before EN does not make it AN */
1461                    prop=R;
1462                } else if(prop==EN) {
1463                    if(nextStrongPos<=i) {
1464                        /* look for next strong char (L/R/AL) */
1465                        int32_t j;
1466                        nextStrongProp=R;   /* set default */
1467                        nextStrongPos=limit;
1468                        for(j=i+1; j<limit; j++) {
1469                            prop1=NO_CONTEXT_RTL(dirProps[j]);
1470                            if(prop1==L || prop1==R || prop1==AL) {
1471                                nextStrongProp=prop1;
1472                                nextStrongPos=j;
1473                                break;
1474                            }
1475                        }
1476                    }
1477                    if(nextStrongProp==AL) {
1478                        prop=AN;
1479                    }
1480                }
1481            }
1482            gprop=groupProp[prop];
1483        }
1484        oldStateImp=stateImp;
1485        cell=impTabProps[oldStateImp][gprop];
1486        stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
1487        actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
1488        if((i==limit) && (actionImp==0)) {
1489            /* there is an unprocessed sequence if its property == eor   */
1490            actionImp=1;                    /* process the last sequence */
1491        }
1492        if(actionImp) {
1493            resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
1494            switch(actionImp) {
1495            case 1:             /* process current seq1, init new seq1 */
1496                processPropertySeq(pBiDi, &levState, resProp, start1, i);
1497                start1=i;
1498                break;
1499            case 2:             /* init new seq2 */
1500                start2=i;
1501                break;
1502            case 3:             /* process seq1, process seq2, init new seq1 */
1503                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
1504                processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
1505                start1=i;
1506                break;
1507            case 4:             /* process seq1, set seq1=seq2, init new seq2 */
1508                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
1509                start1=start2;
1510                start2=i;
1511                break;
1512            default:            /* we should never get here */
1513                U_ASSERT(FALSE);
1514                break;
1515            }
1516        }
1517    }
1518    /* flush possible pending sequence, e.g. ON */
1519    processPropertySeq(pBiDi, &levState, eor, limit, limit);
1520}
1521
1522/* perform (L1) and (X9) ---------------------------------------------------- */
1523
1524/*
1525 * Reset the embedding levels for some non-graphic characters (L1).
1526 * This function also sets appropriate levels for BN, and
1527 * explicit embedding types that are supposed to have been removed
1528 * from the paragraph in (X9).
1529 */
1530static void
1531adjustWSLevels(UBiDi *pBiDi) {
1532    const DirProp *dirProps=pBiDi->dirProps;
1533    UBiDiLevel *levels=pBiDi->levels;
1534    int32_t i;
1535
1536    if(pBiDi->flags&MASK_WS) {
1537        UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
1538        Flags flag;
1539
1540        i=pBiDi->trailingWSStart;
1541        while(i>0) {
1542            /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1543            while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) {
1544                if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1545                    levels[i]=0;
1546                } else {
1547                    levels[i]=GET_PARALEVEL(pBiDi, i);
1548                }
1549            }
1550
1551            /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1552            /* here, i+1 is guaranteed to be <length */
1553            while(i>0) {
1554                flag=DIRPROP_FLAG_NC(dirProps[--i]);
1555                if(flag&MASK_BN_EXPLICIT) {
1556                    levels[i]=levels[i+1];
1557                } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1558                    levels[i]=0;
1559                    break;
1560                } else if(flag&MASK_B_S) {
1561                    levels[i]=GET_PARALEVEL(pBiDi, i);
1562                    break;
1563                }
1564            }
1565        }
1566    }
1567}
1568
1569#define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
1570#define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
1571static void
1572setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
1573                UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
1574    void *runsOnlyMemory;
1575    int32_t *visualMap;
1576    UChar *visualText;
1577    int32_t saveLength, saveTrailingWSStart;
1578    const UBiDiLevel *levels;
1579    UBiDiLevel *saveLevels;
1580    UBiDiDirection saveDirection;
1581    UBool saveMayAllocateText;
1582    Run *runs;
1583    int32_t visualLength, i, j, visualStart, logicalStart,
1584            runCount, runLength, addedRuns, insertRemove,
1585            start, limit, step, indexOddBit, logicalPos,
1586            index0, index1;
1587    uint32_t saveOptions;
1588
1589    pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
1590    if(length==0) {
1591        ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
1592        goto cleanup3;
1593    }
1594    /* obtain memory for mapping table and visual text */
1595    runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
1596    if(runsOnlyMemory==NULL) {
1597        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1598        goto cleanup3;
1599    }
1600    visualMap=runsOnlyMemory;
1601    visualText=(UChar *)&visualMap[length];
1602    saveLevels=(UBiDiLevel *)&visualText[length];
1603    saveOptions=pBiDi->reorderingOptions;
1604    if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
1605        pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
1606        pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
1607    }
1608    paraLevel&=1;                       /* accept only 0 or 1 */
1609    ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
1610    if(U_FAILURE(*pErrorCode)) {
1611        goto cleanup3;
1612    }
1613    /* we cannot access directly pBiDi->levels since it is not yet set if
1614     * direction is not MIXED
1615     */
1616    levels=ubidi_getLevels(pBiDi, pErrorCode);
1617    uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
1618    saveTrailingWSStart=pBiDi->trailingWSStart;
1619    saveLength=pBiDi->length;
1620    saveDirection=pBiDi->direction;
1621
1622    /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
1623     * the visual map and the dirProps array to drive the second call
1624     * to ubidi_setPara (but must make provision for possible removal of
1625     * BiDi controls.  Alternatively, only use the dirProps array via
1626     * customized classifier callback.
1627     */
1628    visualLength=ubidi_writeReordered(pBiDi, visualText, length,
1629                                      UBIDI_DO_MIRRORING, pErrorCode);
1630    ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
1631    if(U_FAILURE(*pErrorCode)) {
1632        goto cleanup2;
1633    }
1634    pBiDi->reorderingOptions=saveOptions;
1635
1636    pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
1637    paraLevel^=1;
1638    /* Because what we did with reorderingOptions, visualText may be shorter
1639     * than the original text. But we don't want the levels memory to be
1640     * reallocated shorter than the original length, since we need to restore
1641     * the levels as after the first call to ubidi_setpara() before returning.
1642     * We will force mayAllocateText to FALSE before the second call to
1643     * ubidi_setpara(), and will restore it afterwards.
1644     */
1645    saveMayAllocateText=pBiDi->mayAllocateText;
1646    pBiDi->mayAllocateText=FALSE;
1647    ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
1648    pBiDi->mayAllocateText=saveMayAllocateText;
1649    ubidi_getRuns(pBiDi, pErrorCode);
1650    if(U_FAILURE(*pErrorCode)) {
1651        goto cleanup1;
1652    }
1653    /* check if some runs must be split, count how many splits */
1654    addedRuns=0;
1655    runCount=pBiDi->runCount;
1656    runs=pBiDi->runs;
1657    visualStart=0;
1658    for(i=0; i<runCount; i++, visualStart+=runLength) {
1659        runLength=runs[i].visualLimit-visualStart;
1660        if(runLength<2) {
1661            continue;
1662        }
1663        logicalStart=GET_INDEX(runs[i].logicalStart);
1664        for(j=logicalStart+1; j<logicalStart+runLength; j++) {
1665            index0=visualMap[j];
1666            index1=visualMap[j-1];
1667            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
1668                addedRuns++;
1669            }
1670        }
1671    }
1672    if(addedRuns) {
1673        if(getRunsMemory(pBiDi, runCount+addedRuns)) {
1674            if(runCount==1) {
1675                /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
1676                pBiDi->runsMemory[0]=runs[0];
1677            }
1678            runs=pBiDi->runs=pBiDi->runsMemory;
1679            pBiDi->runCount+=addedRuns;
1680        } else {
1681            goto cleanup1;
1682        }
1683    }
1684    /* split runs which are not consecutive in source text */
1685    for(i=runCount-1; i>=0; i--) {
1686        runLength= i==0 ? runs[0].visualLimit :
1687                          runs[i].visualLimit-runs[i-1].visualLimit;
1688        logicalStart=runs[i].logicalStart;
1689        indexOddBit=GET_ODD_BIT(logicalStart);
1690        logicalStart=GET_INDEX(logicalStart);
1691        if(runLength<2) {
1692            if(addedRuns) {
1693                runs[i+addedRuns]=runs[i];
1694            }
1695            logicalPos=visualMap[logicalStart];
1696            runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1697                                            saveLevels[logicalPos]^indexOddBit);
1698            continue;
1699        }
1700        if(indexOddBit) {
1701            start=logicalStart;
1702            limit=logicalStart+runLength-1;
1703            step=1;
1704        } else {
1705            start=logicalStart+runLength-1;
1706            limit=logicalStart;
1707            step=-1;
1708        }
1709        for(j=start; j!=limit; j+=step) {
1710            index0=visualMap[j];
1711            index1=visualMap[j+step];
1712            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
1713                logicalPos=BIDI_MIN(visualMap[start], index0);
1714                runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1715                                            saveLevels[logicalPos]^indexOddBit);
1716                runs[i+addedRuns].visualLimit=runs[i].visualLimit;
1717                runs[i].visualLimit-=BIDI_ABS(j-start)+1;
1718                insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
1719                runs[i+addedRuns].insertRemove=insertRemove;
1720                runs[i].insertRemove&=~insertRemove;
1721                start=j+step;
1722                addedRuns--;
1723            }
1724        }
1725        if(addedRuns) {
1726            runs[i+addedRuns]=runs[i];
1727        }
1728        logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
1729        runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1730                                            saveLevels[logicalPos]^indexOddBit);
1731    }
1732
1733  cleanup1:
1734    /* restore initial paraLevel */
1735    pBiDi->paraLevel^=1;
1736  cleanup2:
1737    /* restore real text */
1738    pBiDi->text=text;
1739    pBiDi->length=saveLength;
1740    pBiDi->originalLength=length;
1741    pBiDi->direction=saveDirection;
1742    /* the saved levels should never excess levelsSize, but we check anyway */
1743    if(saveLength>pBiDi->levelsSize) {
1744        saveLength=pBiDi->levelsSize;
1745    }
1746    uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
1747    pBiDi->trailingWSStart=saveTrailingWSStart;
1748    /* free memory for mapping table and visual text */
1749    uprv_free(runsOnlyMemory);
1750    if(pBiDi->runCount>1) {
1751        pBiDi->direction=UBIDI_MIXED;
1752    }
1753  cleanup3:
1754    pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
1755}
1756
1757/* ubidi_setPara ------------------------------------------------------------ */
1758
1759U_CAPI void U_EXPORT2
1760ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
1761              UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
1762              UErrorCode *pErrorCode) {
1763    UBiDiDirection direction;
1764
1765    /* check the argument values */
1766    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
1767    if(pBiDi==NULL || text==NULL || length<-1 ||
1768       (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
1769        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1770        return;
1771    }
1772
1773    if(length==-1) {
1774        length=u_strlen(text);
1775    }
1776
1777    /* special treatment for RUNS_ONLY mode */
1778    if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
1779        setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
1780        return;
1781    }
1782
1783    /* initialize the UBiDi structure */
1784    pBiDi->pParaBiDi=NULL;          /* mark unfinished setPara */
1785    pBiDi->text=text;
1786    pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
1787    pBiDi->paraLevel=paraLevel;
1788    pBiDi->direction=UBIDI_LTR;
1789    pBiDi->paraCount=1;
1790
1791    pBiDi->dirProps=NULL;
1792    pBiDi->levels=NULL;
1793    pBiDi->runs=NULL;
1794    pBiDi->insertPoints.size=0;         /* clean up from last call */
1795    pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
1796
1797    /*
1798     * Save the original paraLevel if contextual; otherwise, set to 0.
1799     */
1800    if(IS_DEFAULT_LEVEL(paraLevel)) {
1801        pBiDi->defaultParaLevel=paraLevel;
1802    } else {
1803        pBiDi->defaultParaLevel=0;
1804    }
1805
1806    if(length==0) {
1807        /*
1808         * For an empty paragraph, create a UBiDi object with the paraLevel and
1809         * the flags and the direction set but without allocating zero-length arrays.
1810         * There is nothing more to do.
1811         */
1812        if(IS_DEFAULT_LEVEL(paraLevel)) {
1813            pBiDi->paraLevel&=1;
1814            pBiDi->defaultParaLevel=0;
1815        }
1816        if(paraLevel&1) {
1817            pBiDi->flags=DIRPROP_FLAG(R);
1818            pBiDi->direction=UBIDI_RTL;
1819        } else {
1820            pBiDi->flags=DIRPROP_FLAG(L);
1821            pBiDi->direction=UBIDI_LTR;
1822        }
1823
1824        pBiDi->runCount=0;
1825        pBiDi->paraCount=0;
1826        pBiDi->pParaBiDi=pBiDi;         /* mark successful setPara */
1827        return;
1828    }
1829
1830    pBiDi->runCount=-1;
1831
1832    /*
1833     * Get the directional properties,
1834     * the flags bit-set, and
1835     * determine the paragraph level if necessary.
1836     */
1837    if(getDirPropsMemory(pBiDi, length)) {
1838        pBiDi->dirProps=pBiDi->dirPropsMemory;
1839        getDirProps(pBiDi);
1840    } else {
1841        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1842        return;
1843    }
1844    /* the processed length may have changed if UBIDI_OPTION_STREAMING */
1845    length= pBiDi->length;
1846    pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
1847    /* allocate paras memory */
1848    if(pBiDi->paraCount>1) {
1849        if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) {
1850            pBiDi->paras=pBiDi->parasMemory;
1851            pBiDi->paras[pBiDi->paraCount-1]=length;
1852        } else {
1853            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1854            return;
1855        }
1856    } else {
1857        /* initialize paras for single paragraph */
1858        pBiDi->paras=pBiDi->simpleParas;
1859        pBiDi->simpleParas[0]=length;
1860    }
1861
1862    /* are explicit levels specified? */
1863    if(embeddingLevels==NULL) {
1864        /* no: determine explicit levels according to the (Xn) rules */\
1865        if(getLevelsMemory(pBiDi, length)) {
1866            pBiDi->levels=pBiDi->levelsMemory;
1867            direction=resolveExplicitLevels(pBiDi);
1868        } else {
1869            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1870            return;
1871        }
1872    } else {
1873        /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
1874        pBiDi->levels=embeddingLevels;
1875        direction=checkExplicitLevels(pBiDi, pErrorCode);
1876        if(U_FAILURE(*pErrorCode)) {
1877            return;
1878        }
1879    }
1880
1881    /*
1882     * The steps after (X9) in the UBiDi algorithm are performed only if
1883     * the paragraph text has mixed directionality!
1884     */
1885    pBiDi->direction=direction;
1886    switch(direction) {
1887    case UBIDI_LTR:
1888        /* make sure paraLevel is even */
1889        pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
1890
1891        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1892        pBiDi->trailingWSStart=0;
1893        break;
1894    case UBIDI_RTL:
1895        /* make sure paraLevel is odd */
1896        pBiDi->paraLevel|=1;
1897
1898        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1899        pBiDi->trailingWSStart=0;
1900        break;
1901    default:
1902        /*
1903         *  Choose the right implicit state table
1904         */
1905        switch(pBiDi->reorderingMode) {
1906        case UBIDI_REORDER_DEFAULT:
1907            pBiDi->pImpTabPair=&impTab_DEFAULT;
1908            break;
1909        case UBIDI_REORDER_NUMBERS_SPECIAL:
1910            pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
1911            break;
1912        case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
1913            pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
1914            break;
1915        case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
1916            pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
1917            break;
1918        case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
1919            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
1920                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
1921            } else {
1922                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
1923            }
1924            break;
1925        case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
1926            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
1927                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
1928            } else {
1929                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
1930            }
1931            break;
1932        default:
1933            /* we should never get here */
1934            U_ASSERT(FALSE);
1935            break;
1936        }
1937        /*
1938         * If there are no external levels specified and there
1939         * are no significant explicit level codes in the text,
1940         * then we can treat the entire paragraph as one run.
1941         * Otherwise, we need to perform the following rules on runs of
1942         * the text with the same embedding levels. (X10)
1943         * "Significant" explicit level codes are ones that actually
1944         * affect non-BN characters.
1945         * Examples for "insignificant" ones are empty embeddings
1946         * LRE-PDF, LRE-RLE-PDF-PDF, etc.
1947         */
1948        if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
1949                                   !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
1950            resolveImplicitLevels(pBiDi, 0, length,
1951                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
1952                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
1953        } else {
1954            /* sor, eor: start and end types of same-level-run */
1955            UBiDiLevel *levels=pBiDi->levels;
1956            int32_t start, limit=0;
1957            UBiDiLevel level, nextLevel;
1958            DirProp sor, eor;
1959
1960            /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
1961            level=GET_PARALEVEL(pBiDi, 0);
1962            nextLevel=levels[0];
1963            if(level<nextLevel) {
1964                eor=GET_LR_FROM_LEVEL(nextLevel);
1965            } else {
1966                eor=GET_LR_FROM_LEVEL(level);
1967            }
1968
1969            do {
1970                /* determine start and limit of the run (end points just behind the run) */
1971
1972                /* the values for this run's start are the same as for the previous run's end */
1973                start=limit;
1974                level=nextLevel;
1975                if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) {
1976                    /* except if this is a new paragraph, then set sor = para level */
1977                    sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
1978                } else {
1979                    sor=eor;
1980                }
1981
1982                /* search for the limit of this run */
1983                while(++limit<length && levels[limit]==level) {}
1984
1985                /* get the correct level of the next run */
1986                if(limit<length) {
1987                    nextLevel=levels[limit];
1988                } else {
1989                    nextLevel=GET_PARALEVEL(pBiDi, length-1);
1990                }
1991
1992                /* determine eor from max(level, nextLevel); sor is last run's eor */
1993                if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
1994                    eor=GET_LR_FROM_LEVEL(nextLevel);
1995                } else {
1996                    eor=GET_LR_FROM_LEVEL(level);
1997                }
1998
1999                /* if the run consists of overridden directional types, then there
2000                   are no implicit types to be resolved */
2001                if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2002                    resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2003                } else {
2004                    /* remove the UBIDI_LEVEL_OVERRIDE flags */
2005                    do {
2006                        levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2007                    } while(start<limit);
2008                }
2009            } while(limit<length);
2010        }
2011        /* check if we got any memory shortage while adding insert points */
2012        if (U_FAILURE(pBiDi->insertPoints.errorCode))
2013        {
2014            *pErrorCode=pBiDi->insertPoints.errorCode;
2015            return;
2016        }
2017        /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2018        adjustWSLevels(pBiDi);
2019        break;
2020    }
2021    /* add RLM for inverse Bidi with contextual orientation resolving
2022     * to RTL which would not round-trip otherwise
2023     */
2024    if((pBiDi->defaultParaLevel>0) &&
2025       (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2026       ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2027        (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2028        int32_t i, j, start, last;
2029        DirProp dirProp;
2030        for(i=0; i<pBiDi->paraCount; i++) {
2031            last=pBiDi->paras[i]-1;
2032            if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) {
2033                continue;           /* LTR paragraph */
2034            }
2035            start= i==0 ? 0 : pBiDi->paras[i - 1];
2036            for(j=last; j>=start; j--) {
2037                dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]);
2038                if(dirProp==L) {
2039                    if(j<last) {
2040                        while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) {
2041                            last--;
2042                        }
2043                    }
2044                    addPoint(pBiDi, last, RLM_BEFORE);
2045                    break;
2046                }
2047                if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2048                    break;
2049                }
2050            }
2051        }
2052    }
2053
2054    if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2055        pBiDi->resultLength -= pBiDi->controlCount;
2056    } else {
2057        pBiDi->resultLength += pBiDi->insertPoints.size;
2058    }
2059    pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
2060}
2061
2062U_CAPI void U_EXPORT2
2063ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2064    if(pBiDi!=NULL) {
2065        pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2066    }
2067}
2068
2069U_CAPI UBool U_EXPORT2
2070ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2071    if(pBiDi!=NULL) {
2072        return pBiDi->orderParagraphsLTR;
2073    } else {
2074        return FALSE;
2075    }
2076}
2077
2078U_CAPI UBiDiDirection U_EXPORT2
2079ubidi_getDirection(const UBiDi *pBiDi) {
2080    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2081        return pBiDi->direction;
2082    } else {
2083        return UBIDI_LTR;
2084    }
2085}
2086
2087U_CAPI const UChar * U_EXPORT2
2088ubidi_getText(const UBiDi *pBiDi) {
2089    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2090        return pBiDi->text;
2091    } else {
2092        return NULL;
2093    }
2094}
2095
2096U_CAPI int32_t U_EXPORT2
2097ubidi_getLength(const UBiDi *pBiDi) {
2098    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2099        return pBiDi->originalLength;
2100    } else {
2101        return 0;
2102    }
2103}
2104
2105U_CAPI int32_t U_EXPORT2
2106ubidi_getProcessedLength(const UBiDi *pBiDi) {
2107    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2108        return pBiDi->length;
2109    } else {
2110        return 0;
2111    }
2112}
2113
2114U_CAPI int32_t U_EXPORT2
2115ubidi_getResultLength(const UBiDi *pBiDi) {
2116    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2117        return pBiDi->resultLength;
2118    } else {
2119        return 0;
2120    }
2121}
2122
2123/* paragraphs API functions ------------------------------------------------- */
2124
2125U_CAPI UBiDiLevel U_EXPORT2
2126ubidi_getParaLevel(const UBiDi *pBiDi) {
2127    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2128        return pBiDi->paraLevel;
2129    } else {
2130        return 0;
2131    }
2132}
2133
2134U_CAPI int32_t U_EXPORT2
2135ubidi_countParagraphs(UBiDi *pBiDi) {
2136    if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2137        return 0;
2138    } else {
2139        return pBiDi->paraCount;
2140    }
2141}
2142
2143U_CAPI void U_EXPORT2
2144ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2145                          int32_t *pParaStart, int32_t *pParaLimit,
2146                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2147    int32_t paraStart;
2148
2149    /* check the argument values */
2150    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2151    RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2152    RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2153
2154    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2155    if(paraIndex) {
2156        paraStart=pBiDi->paras[paraIndex-1];
2157    } else {
2158        paraStart=0;
2159    }
2160    if(pParaStart!=NULL) {
2161        *pParaStart=paraStart;
2162    }
2163    if(pParaLimit!=NULL) {
2164        *pParaLimit=pBiDi->paras[paraIndex];
2165    }
2166    if(pParaLevel!=NULL) {
2167        *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2168    }
2169}
2170
2171U_CAPI int32_t U_EXPORT2
2172ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2173                          int32_t *pParaStart, int32_t *pParaLimit,
2174                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2175    uint32_t paraIndex;
2176
2177    /* check the argument values */
2178    /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2179    RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2180    RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2181    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
2182    RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2183
2184    for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++);
2185    ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2186    return paraIndex;
2187}
2188
2189U_CAPI void U_EXPORT2
2190ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2191                       const void *newContext, UBiDiClassCallback **oldFn,
2192                       const void **oldContext, UErrorCode *pErrorCode)
2193{
2194    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2195    if(pBiDi==NULL) {
2196        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2197        return;
2198    }
2199    if( oldFn )
2200    {
2201        *oldFn = pBiDi->fnClassCallback;
2202    }
2203    if( oldContext )
2204    {
2205        *oldContext = pBiDi->coClassCallback;
2206    }
2207    pBiDi->fnClassCallback = newFn;
2208    pBiDi->coClassCallback = newContext;
2209}
2210
2211U_CAPI void U_EXPORT2
2212ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
2213{
2214    if(pBiDi==NULL) {
2215        return;
2216    }
2217    if( fn )
2218    {
2219        *fn = pBiDi->fnClassCallback;
2220    }
2221    if( context )
2222    {
2223        *context = pBiDi->coClassCallback;
2224    }
2225}
2226
2227U_CAPI UCharDirection U_EXPORT2
2228ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
2229{
2230    UCharDirection dir;
2231
2232    if( pBiDi->fnClassCallback == NULL ||
2233        (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
2234    {
2235        return ubidi_getClass(pBiDi->bdp, c);
2236    } else {
2237        return dir;
2238    }
2239}
2240
2241