1/* 2****************************************************************************** 3* 4* Copyright (C) 1999-2013, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7****************************************************************************** 8* file name: ubidi.c 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 1999jul27 14* created by: Markus W. Scherer, updated by Matitiahu Allouche 15* 16*/ 17 18#include "cmemory.h" 19#include "unicode/utypes.h" 20#include "unicode/ustring.h" 21#include "unicode/uchar.h" 22#include "unicode/ubidi.h" 23#include "unicode/utf16.h" 24#include "ubidi_props.h" 25#include "ubidiimp.h" 26#include "uassert.h" 27 28/* 29 * General implementation notes: 30 * 31 * Throughout the implementation, there are comments like (W2) that refer to 32 * rules of the BiDi algorithm in its version 5, in this example to the second 33 * rule of the resolution of weak types. 34 * 35 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) 36 * character according to UTF-16, the second UChar gets the directional property of 37 * the entire character assigned, while the first one gets a BN, a boundary 38 * neutral, type, which is ignored by most of the algorithm according to 39 * rule (X9) and the implementation suggestions of the BiDi algorithm. 40 * 41 * Later, adjustWSLevels() will set the level for each BN to that of the 42 * following character (UChar), which results in surrogate pairs getting the 43 * same level on each of their surrogates. 44 * 45 * In a UTF-8 implementation, the same thing could be done: the last byte of 46 * a multi-byte sequence would get the "real" property, while all previous 47 * bytes of that sequence would get BN. 48 * 49 * It is not possible to assign all those parts of a character the same real 50 * property because this would fail in the resolution of weak types with rules 51 * that look at immediately surrounding types. 52 * 53 * As a related topic, this implementation does not remove Boundary Neutral 54 * types from the input, but ignores them wherever this is relevant. 55 * For example, the loop for the resolution of the weak types reads 56 * types until it finds a non-BN. 57 * Also, explicit embedding codes are neither changed into BN nor removed. 58 * They are only treated the same way real BNs are. 59 * As stated before, adjustWSLevels() takes care of them at the end. 60 * For the purpose of conformance, the levels of all these codes 61 * do not matter. 62 * 63 * Note that this implementation never modifies the dirProps 64 * after the initial setup, except for FSI which is changed to either 65 * LRI or RLI in getDirProps(), and paired brackets which may be changed 66 * to L or R according to N0. 67 * 68 * 69 * In this implementation, the resolution of weak types (Wn), 70 * neutrals (Nn), and the assignment of the resolved level (In) 71 * are all done in one single loop, in resolveImplicitLevels(). 72 * Changes of dirProp values are done on the fly, without writing 73 * them back to the dirProps array. 74 * 75 * 76 * This implementation contains code that allows to bypass steps of the 77 * algorithm that are not needed on the specific paragraph 78 * in order to speed up the most common cases considerably, 79 * like text that is entirely LTR, or RTL text without numbers. 80 * 81 * Most of this is done by setting a bit for each directional property 82 * in a flags variable and later checking for whether there are 83 * any LTR characters or any RTL characters, or both, whether 84 * there are any explicit embedding codes, etc. 85 * 86 * If the (Xn) steps are performed, then the flags are re-evaluated, 87 * because they will then not contain the embedding codes any more 88 * and will be adjusted for override codes, so that subsequently 89 * more bypassing may be possible than what the initial flags suggested. 90 * 91 * If the text is not mixed-directional, then the 92 * algorithm steps for the weak type resolution are not performed, 93 * and all levels are set to the paragraph level. 94 * 95 * If there are no explicit embedding codes, then the (Xn) steps 96 * are not performed. 97 * 98 * If embedding levels are supplied as a parameter, then all 99 * explicit embedding codes are ignored, and the (Xn) steps 100 * are not performed. 101 * 102 * White Space types could get the level of the run they belong to, 103 * and are checked with a test of (flags&MASK_EMBEDDING) to 104 * consider if the paragraph direction should be considered in 105 * the flags variable. 106 * 107 * If there are no White Space types in the paragraph, then 108 * (L1) is not necessary in adjustWSLevels(). 109 */ 110 111/* to avoid some conditional statements, use tiny constant arrays */ 112static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; 113static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; 114static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; 115 116#define DIRPROP_FLAG_LR(level) flagLR[(level)&1] 117#define DIRPROP_FLAG_E(level) flagE[(level)&1] 118#define DIRPROP_FLAG_O(level) flagO[(level)&1] 119 120#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R) 121 122/* UBiDi object management -------------------------------------------------- */ 123 124U_CAPI UBiDi * U_EXPORT2 125ubidi_open(void) 126{ 127 UErrorCode errorCode=U_ZERO_ERROR; 128 return ubidi_openSized(0, 0, &errorCode); 129} 130 131U_CAPI UBiDi * U_EXPORT2 132ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { 133 UBiDi *pBiDi; 134 135 /* check the argument values */ 136 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 137 return NULL; 138 } else if(maxLength<0 || maxRunCount<0) { 139 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 140 return NULL; /* invalid arguments */ 141 } 142 143 /* allocate memory for the object */ 144 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); 145 if(pBiDi==NULL) { 146 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 147 return NULL; 148 } 149 150 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ 151 uprv_memset(pBiDi, 0, sizeof(UBiDi)); 152 153 /* get BiDi properties */ 154 pBiDi->bdp=ubidi_getSingleton(); 155 156 /* allocate memory for arrays as requested */ 157 if(maxLength>0) { 158 if( !getInitialDirPropsMemory(pBiDi, maxLength) || 159 !getInitialLevelsMemory(pBiDi, maxLength) 160 ) { 161 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 162 } 163 } else { 164 pBiDi->mayAllocateText=TRUE; 165 } 166 167 if(maxRunCount>0) { 168 if(maxRunCount==1) { 169 /* use simpleRuns[] */ 170 pBiDi->runsSize=sizeof(Run); 171 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { 172 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 173 } 174 } else { 175 pBiDi->mayAllocateRuns=TRUE; 176 } 177 178 if(U_SUCCESS(*pErrorCode)) { 179 return pBiDi; 180 } else { 181 ubidi_close(pBiDi); 182 return NULL; 183 } 184} 185 186/* 187 * We are allowed to allocate memory if memory==NULL or 188 * mayAllocate==TRUE for each array that we need. 189 * We also try to grow memory as needed if we 190 * allocate it. 191 * 192 * Assume sizeNeeded>0. 193 * If *pMemory!=NULL, then assume *pSize>0. 194 * 195 * ### this realloc() may unnecessarily copy the old data, 196 * which we know we don't need any more; 197 * is this the best way to do this?? 198 */ 199U_CFUNC UBool 200ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { 201 void **pMemory = (void **)bidiMem; 202 /* check for existing memory */ 203 if(*pMemory==NULL) { 204 /* we need to allocate memory */ 205 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { 206 *pSize=sizeNeeded; 207 return TRUE; 208 } else { 209 return FALSE; 210 } 211 } else { 212 if(sizeNeeded<=*pSize) { 213 /* there is already enough memory */ 214 return TRUE; 215 } 216 else if(!mayAllocate) { 217 /* not enough memory, and we must not allocate */ 218 return FALSE; 219 } else { 220 /* we try to grow */ 221 void *memory; 222 /* in most cases, we do not need the copy-old-data part of 223 * realloc, but it is needed when adding runs using getRunsMemory() 224 * in setParaRunsOnly() 225 */ 226 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { 227 *pMemory=memory; 228 *pSize=sizeNeeded; 229 return TRUE; 230 } else { 231 /* we failed to grow */ 232 return FALSE; 233 } 234 } 235 } 236} 237 238U_CAPI void U_EXPORT2 239ubidi_close(UBiDi *pBiDi) { 240 if(pBiDi!=NULL) { 241 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ 242 if(pBiDi->dirPropsMemory!=NULL) { 243 uprv_free(pBiDi->dirPropsMemory); 244 } 245 if(pBiDi->levelsMemory!=NULL) { 246 uprv_free(pBiDi->levelsMemory); 247 } 248 if(pBiDi->openingsMemory!=NULL) { 249 uprv_free(pBiDi->openingsMemory); 250 } 251 if(pBiDi->parasMemory!=NULL) { 252 uprv_free(pBiDi->parasMemory); 253 } 254 if(pBiDi->runsMemory!=NULL) { 255 uprv_free(pBiDi->runsMemory); 256 } 257 if(pBiDi->isolatesMemory!=NULL) { 258 uprv_free(pBiDi->isolatesMemory); 259 } 260 if(pBiDi->insertPoints.points!=NULL) { 261 uprv_free(pBiDi->insertPoints.points); 262 } 263 264 uprv_free(pBiDi); 265 } 266} 267 268/* set to approximate "inverse BiDi" ---------------------------------------- */ 269 270U_CAPI void U_EXPORT2 271ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { 272 if(pBiDi!=NULL) { 273 pBiDi->isInverse=isInverse; 274 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L 275 : UBIDI_REORDER_DEFAULT; 276 } 277} 278 279U_CAPI UBool U_EXPORT2 280ubidi_isInverse(UBiDi *pBiDi) { 281 if(pBiDi!=NULL) { 282 return pBiDi->isInverse; 283 } else { 284 return FALSE; 285 } 286} 287 288/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of 289 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre 290 * concept of RUNS_ONLY which is a double operation. 291 * It could be advantageous to divide this into 3 concepts: 292 * a) Operation: direct / inverse / RUNS_ONLY 293 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R 294 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL 295 * This would allow combinations not possible today like RUNS_ONLY with 296 * NUMBERS_SPECIAL. 297 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and 298 * REMOVE_CONTROLS for the inverse step. 299 * Not all combinations would be supported, and probably not all do make sense. 300 * This would need to document which ones are supported and what are the 301 * fallbacks for unsupported combinations. 302 */ 303U_CAPI void U_EXPORT2 304ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { 305 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) 306 && (reorderingMode < UBIDI_REORDER_COUNT)) { 307 pBiDi->reorderingMode = reorderingMode; 308 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); 309 } 310} 311 312U_CAPI UBiDiReorderingMode U_EXPORT2 313ubidi_getReorderingMode(UBiDi *pBiDi) { 314 if (pBiDi!=NULL) { 315 return pBiDi->reorderingMode; 316 } else { 317 return UBIDI_REORDER_DEFAULT; 318 } 319} 320 321U_CAPI void U_EXPORT2 322ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { 323 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { 324 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; 325 } 326 if (pBiDi!=NULL) { 327 pBiDi->reorderingOptions=reorderingOptions; 328 } 329} 330 331U_CAPI uint32_t U_EXPORT2 332ubidi_getReorderingOptions(UBiDi *pBiDi) { 333 if (pBiDi!=NULL) { 334 return pBiDi->reorderingOptions; 335 } else { 336 return 0; 337 } 338} 339 340U_CAPI UBiDiDirection U_EXPORT2 341ubidi_getBaseDirection(const UChar *text, 342int32_t length){ 343 344 int32_t i; 345 UChar32 uchar; 346 UCharDirection dir; 347 348 if( text==NULL || length<-1 ){ 349 return UBIDI_NEUTRAL; 350 } 351 352 if(length==-1) { 353 length=u_strlen(text); 354 } 355 356 for( i = 0 ; i < length; ) { 357 /* i is incremented by U16_NEXT */ 358 U16_NEXT(text, i, length, uchar); 359 dir = u_charDirection(uchar); 360 if( dir == U_LEFT_TO_RIGHT ) 361 return UBIDI_LTR; 362 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC ) 363 return UBIDI_RTL; 364 } 365 return UBIDI_NEUTRAL; 366} 367 368/* perform (P2)..(P3) ------------------------------------------------------- */ 369 370/** 371 * Returns the directionality of the first strong character 372 * after the last B in prologue, if any. 373 * Requires prologue!=null. 374 */ 375static DirProp 376firstL_R_AL(UBiDi *pBiDi) { 377 const UChar *text=pBiDi->prologue; 378 int32_t length=pBiDi->proLength; 379 int32_t i; 380 UChar32 uchar; 381 DirProp dirProp, result=ON; 382 for(i=0; i<length; ) { 383 /* i is incremented by U16_NEXT */ 384 U16_NEXT(text, i, length, uchar); 385 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); 386 if(result==ON) { 387 if(dirProp==L || dirProp==R || dirProp==AL) { 388 result=dirProp; 389 } 390 } else { 391 if(dirProp==B) { 392 result=ON; 393 } 394 } 395 } 396 return result; 397} 398 399/* 400 * Check that there are enough entries in the array pointed to by pBiDi->paras 401 */ 402static UBool 403checkParaCount(UBiDi *pBiDi) { 404 int32_t count=pBiDi->paraCount; 405 if(pBiDi->paras==pBiDi->simpleParas) { 406 if(count<=SIMPLE_PARAS_SIZE) 407 return TRUE; 408 if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_SIZE * 2)) 409 return FALSE; 410 pBiDi->paras=pBiDi->parasMemory; 411 uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_SIZE * sizeof(Para)); 412 return TRUE; 413 } 414 if(!getInitialParasMemory(pBiDi, count * 2)) 415 return FALSE; 416 pBiDi->paras=pBiDi->parasMemory; 417 return TRUE; 418} 419 420/* 421 * Get the directional properties for the text, calculate the flags bit-set, and 422 * determine the paragraph level if necessary (in pBiDi->paras[i].level). 423 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI. 424 */ 425static UBool 426getDirProps(UBiDi *pBiDi) { 427 const UChar *text=pBiDi->text; 428 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ 429 430 int32_t i=0, originalLength=pBiDi->originalLength; 431 Flags flags=0; /* collect all directionalities in the text */ 432 UChar32 uchar; 433 DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */ 434 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); 435 /* for inverse BiDi, the default para level is set to RTL if there is a 436 strong R or AL character at either end of the text */ 437 UBool isDefaultLevelInverse=isDefaultLevel && (UBool) 438 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || 439 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); 440 int32_t lastArabicPos=-1; 441 int32_t controlCount=0; 442 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & 443 UBIDI_OPTION_REMOVE_CONTROLS); 444 445 typedef enum { 446 NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */ 447 SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */ 448 SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */ 449 LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */ 450 } State; 451 State state; 452 DirProp lastStrong=ON; /* for default level & inverse BiDi */ 453 /* The following stacks are used to manage isolate sequences. Those 454 sequences may be nested, but obviously never more deeply than the 455 maximum explicit embedding level. 456 lastStack is the index of the last used entry in the stack. A value of -1 457 means that there is no open isolate sequence. 458 lastStack is reset to -1 on paragraph boundaries. */ 459 /* The following stack contains the position of the initiator of 460 each open isolate sequence */ 461 int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; 462 /* The following stack contains the last known state before 463 encountering the initiator of an isolate sequence */ 464 int8_t previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; 465 int32_t stackLast=-1; 466 467 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) 468 pBiDi->length=0; 469 defaultParaLevel=pBiDi->paraLevel&1; 470 if(isDefaultLevel) { 471 pBiDi->paras[0].level=defaultParaLevel; 472 lastStrong=defaultParaLevel; 473 if(pBiDi->proLength>0 && /* there is a prologue */ 474 (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */ 475 if(dirProp==L) 476 pBiDi->paras[0].level=0; /* set the default para level */ 477 else 478 pBiDi->paras[0].level=1; /* set the default para level */ 479 state=NOT_SEEKING_STRONG; 480 } else { 481 state=SEEKING_STRONG_FOR_PARA; 482 } 483 } else { 484 pBiDi->paras[0].level=pBiDi->paraLevel; 485 state=NOT_SEEKING_STRONG; 486 } 487 /* count paragraphs and determine the paragraph level (P2..P3) */ 488 /* 489 * see comment in ubidi.h: 490 * the UBIDI_DEFAULT_XXX values are designed so that 491 * their bit 0 alone yields the intended default 492 */ 493 for( /* i=0 above */ ; i<originalLength; ) { 494 /* i is incremented by U16_NEXT */ 495 U16_NEXT(text, i, originalLength, uchar); 496 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar)); 497 dirProps[i-1]=dirProp; 498 if(uchar>0xffff) { /* set the lead surrogate's property to BN */ 499 flags|=DIRPROP_FLAG(BN); 500 dirProps[i-2]=BN; 501 } 502 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) 503 controlCount++; 504 if(dirProp==L) { 505 if(state==SEEKING_STRONG_FOR_PARA) { 506 pBiDi->paras[pBiDi->paraCount-1].level=0; 507 state=NOT_SEEKING_STRONG; 508 } 509 else if(state==SEEKING_STRONG_FOR_FSI) { 510 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { 511 dirProps[isolateStartStack[stackLast]]=LRI; 512 flags|=DIRPROP_FLAG(LRI); 513 } 514 state=LOOKING_FOR_PDI; 515 } 516 lastStrong=L; 517 continue; 518 } 519 if(dirProp==R || dirProp==AL) { 520 if(state==SEEKING_STRONG_FOR_PARA) { 521 pBiDi->paras[pBiDi->paraCount-1].level=1; 522 state=NOT_SEEKING_STRONG; 523 } 524 else if(state==SEEKING_STRONG_FOR_FSI) { 525 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { 526 dirProps[isolateStartStack[stackLast]]=RLI; 527 flags|=DIRPROP_FLAG(RLI); 528 } 529 state=LOOKING_FOR_PDI; 530 } 531 lastStrong=R; 532 if(dirProp==AL) 533 lastArabicPos=i-1; 534 continue; 535 } 536 if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */ 537 stackLast++; 538 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { 539 isolateStartStack[stackLast]=i-1; 540 previousStateStack[stackLast]=state; 541 } 542 if(dirProp==FSI) 543 state=SEEKING_STRONG_FOR_FSI; 544 else 545 state=LOOKING_FOR_PDI; 546 continue; 547 } 548 if(dirProp==PDI) { 549 if(state==SEEKING_STRONG_FOR_FSI) { 550 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { 551 dirProps[isolateStartStack[stackLast]]=LRI; 552 flags|=DIRPROP_FLAG(LRI); 553 } 554 } 555 if(stackLast>=0) { 556 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) 557 state=previousStateStack[stackLast]; 558 stackLast--; 559 } 560 continue; 561 } 562 if(dirProp==B) { 563 if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */ 564 continue; 565 pBiDi->paras[pBiDi->paraCount-1].limit=i; 566 if(isDefaultLevelInverse && lastStrong==R) 567 pBiDi->paras[pBiDi->paraCount-1].level=1; 568 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 569 /* When streaming, we only process whole paragraphs 570 thus some updates are only done on paragraph boundaries */ 571 pBiDi->length=i; /* i is index to next character */ 572 pBiDi->controlCount=controlCount; 573 } 574 if(i<originalLength) { /* B not last char in text */ 575 pBiDi->paraCount++; 576 if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */ 577 return FALSE; 578 if(isDefaultLevel) { 579 pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel; 580 state=SEEKING_STRONG_FOR_PARA; 581 lastStrong=defaultParaLevel; 582 } else { 583 pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel; 584 state=NOT_SEEKING_STRONG; 585 } 586 stackLast=-1; 587 } 588 continue; 589 } 590 } 591 /* Ignore still open isolate sequences with overflow */ 592 if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) { 593 stackLast=UBIDI_MAX_EXPLICIT_LEVEL; 594 if(dirProps[previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL]]!=FSI) 595 state=LOOKING_FOR_PDI; 596 } 597 /* Resolve direction of still unresolved open FSI sequences */ 598 while(stackLast>=0) { 599 if(state==SEEKING_STRONG_FOR_FSI) { 600 dirProps[isolateStartStack[stackLast]]=LRI; 601 flags|=DIRPROP_FLAG(LRI); 602 } 603 state=previousStateStack[stackLast]; 604 stackLast--; 605 } 606 /* When streaming, ignore text after the last paragraph separator */ 607 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 608 if(pBiDi->length<originalLength) 609 pBiDi->paraCount--; 610 } else { 611 pBiDi->paras[pBiDi->paraCount-1].limit=originalLength; 612 pBiDi->controlCount=controlCount; 613 } 614 /* For inverse bidi, default para direction is RTL if there is 615 a strong R or AL at either end of the paragraph */ 616 if(isDefaultLevelInverse && lastStrong==R) { 617 pBiDi->paras[pBiDi->paraCount-1].level=1; 618 } 619 if(isDefaultLevel) { 620 pBiDi->paraLevel=pBiDi->paras[0].level; 621 } 622 /* The following is needed to resolve the text direction for default level 623 paragraphs containing no strong character */ 624 for(i=0; i<pBiDi->paraCount; i++) 625 flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level); 626 627 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { 628 flags|=DIRPROP_FLAG(L); 629 } 630 pBiDi->flags=flags; 631 pBiDi->lastArabicPos=lastArabicPos; 632 return TRUE; 633} 634 635/* determine the paragraph level at position index */ 636U_CFUNC UBiDiLevel 637ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) { 638 int32_t i; 639 for(i=0; i<pBiDi->paraCount; i++) 640 if(pindex<pBiDi->paras[i].limit) 641 break; 642 if(i>=pBiDi->paraCount) 643 i=pBiDi->paraCount-1; 644 return (UBiDiLevel)(pBiDi->paras[i].level); 645} 646 647/* Functions for handling paired brackets ----------------------------------- */ 648 649/* In the isoRuns array, the first entry is used for text outside of any 650 isolate sequence. Higher entries are used for each more deeply nested 651 isolate sequence. isoRunLast is the index of the last used entry. The 652 openings array is used to note the data of opening brackets not yet 653 matched by a closing bracket, or matched but still susceptible to change 654 level. 655 Each isoRun entry contains the index of the first and 656 one-after-last openings entries for pending opening brackets it 657 contains. The next openings entry to use is the one-after-last of the 658 most deeply nested isoRun entry. 659 isoRun entries also contain their current embedding level and the last 660 encountered strong character, since these will be needed to resolve 661 the level of paired brackets. */ 662 663static void 664bracketInit(UBiDi *pBiDi, BracketData *bd) { 665 bd->pBiDi=pBiDi; 666 bd->isoRunLast=0; 667 bd->isoRuns[0].start=0; 668 bd->isoRuns[0].limit=0; 669 bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0); 670 bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1; 671 bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0; 672 if(pBiDi->openingsMemory) { 673 bd->openings=pBiDi->openingsMemory; 674 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); 675 } else { 676 bd->openings=bd->simpleOpenings; 677 bd->openingsCount=SIMPLE_OPENINGS_SIZE; 678 } 679 bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL || 680 bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL; 681} 682 683/* paragraph boundary */ 684static void 685bracketProcessB(BracketData *bd, UBiDiLevel level) { 686 bd->isoRunLast=0; 687 bd->isoRuns[0].limit=0; 688 bd->isoRuns[0].level=level; 689 bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=level&1; 690 bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0; 691} 692 693/* LRE, LRO, RLE, RLO, PDF */ 694static void 695bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, 696 UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) { 697 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 698 DirProp *dirProps=bd->pBiDi->dirProps; 699 if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */ 700 return; 701 if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)> 702 (contextLevel&~UBIDI_LEVEL_OVERRIDE)) /* not a PDF */ 703 contextLevel=embeddingLevel; 704 pLastIsoRun->limit=pLastIsoRun->start; 705 pLastIsoRun->level=embeddingLevel; 706 pLastIsoRun->lastStrong=pLastIsoRun->contextDir=contextLevel&1; 707 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=lastCcPos; 708} 709 710/* LRI or RLI */ 711static void 712bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) { 713 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 714 int16_t lastLimit; 715 lastLimit=pLastIsoRun->limit; 716 bd->isoRunLast++; 717 pLastIsoRun++; 718 pLastIsoRun->start=pLastIsoRun->limit=lastLimit; 719 pLastIsoRun->level=level; 720 pLastIsoRun->lastStrong=pLastIsoRun->contextDir=level&1; 721 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=0; 722} 723 724/* PDI */ 725static void 726bracketProcessPDI(BracketData *bd) { 727 bd->isoRunLast--; 728} 729 730/* newly found opening bracket: create an openings entry */ 731static UBool /* return TRUE if success */ 732bracketAddOpening(BracketData *bd, UChar match, int32_t position) { 733 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 734 Opening *pOpening; 735 if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */ 736 UBiDi *pBiDi=bd->pBiDi; 737 if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2)) 738 return FALSE; 739 if(bd->openings==bd->simpleOpenings) 740 uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings, 741 SIMPLE_OPENINGS_SIZE * sizeof(Opening)); 742 bd->openings=pBiDi->openingsMemory; /* may have changed */ 743 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); 744 } 745 pOpening=&bd->openings[pLastIsoRun->limit]; 746 pOpening->position=position; 747 pOpening->match=match; 748 pOpening->contextDir=pLastIsoRun->contextDir; 749 pOpening->contextPos=pLastIsoRun->contextPos; 750 pOpening->flags=0; 751 pLastIsoRun->limit++; 752 return TRUE; 753} 754 755/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */ 756static void 757fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) { 758 /* This function calls itself recursively */ 759 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 760 Opening *qOpening; 761 DirProp *dirProps=bd->pBiDi->dirProps; 762 int32_t k, openingPosition, closingPosition; 763 for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) { 764 if(qOpening->match>=0) /* not an N0c match */ 765 continue; 766 if(newPropPosition<qOpening->contextPos) 767 break; 768 if(newPropPosition>=qOpening->position) 769 continue; 770 if(newProp==qOpening->contextDir) 771 break; 772 openingPosition=qOpening->position; 773 dirProps[openingPosition]=dirProps[newPropPosition]; 774 closingPosition=-(qOpening->match); 775 dirProps[closingPosition]= newProp; /* can never be AL */ 776 qOpening->match=0; /* prevent further changes */ 777 fixN0c(bd, k, openingPosition, newProp); 778 fixN0c(bd, k, closingPosition, newProp); 779 } 780} 781 782/* handle strong characters, digits and candidates for closing brackets */ 783static UBool /* return TRUE if success */ 784bracketProcessChar(BracketData *bd, int32_t position, DirProp dirProp) { 785 IsoRun *pLastIsoRun; 786 Opening *pOpening, *qOpening; 787 DirProp *dirProps, newProp; 788 UBiDiDirection direction; 789 uint16_t flag; 790 int32_t i, k; 791 UBool stable; 792 UChar c, match; 793 dirProps=bd->pBiDi->dirProps; 794 if(DIRPROP_FLAG(dirProp)&MASK_STRONG_EN_AN) { /* L, R, AL, EN or AN */ 795 pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 796 /* AN after R or AL becomes R or AL; after L or L+AN, it is kept as-is */ 797 if(dirProp==AN && (pLastIsoRun->lastStrong==R || pLastIsoRun->lastStrong==AL)) 798 dirProp=pLastIsoRun->lastStrong; 799 /* EN after L or L+AN becomes L; after R or AL, it becomes R or AL */ 800 if(dirProp==EN) { 801 if(pLastIsoRun->lastStrong==L || pLastIsoRun->lastStrong==AN) { 802 dirProp=L; 803 if(!bd->isNumbersSpecial) 804 dirProps[position]=ENL; 805 } 806 else { 807 dirProp=pLastIsoRun->lastStrong; /* may be R or AL */ 808 if(!bd->isNumbersSpecial) 809 dirProps[position]= dirProp==AL ? AN : ENR; 810 } 811 } 812 pLastIsoRun->lastStrong=dirProp; 813 pLastIsoRun->contextDir=DIR_FROM_STRONG(dirProp); 814 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=position; 815 if(dirProp==AL || dirProp==AN) 816 dirProp=R; 817 flag=DIRPROP_FLAG(dirProp); 818 /* strong characters found after an unmatched opening bracket 819 must be noted for possibly applying N0b */ 820 for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++) 821 bd->openings[i].flags|=flag; 822 return TRUE; 823 } 824 if(dirProp!=ON) 825 return TRUE; 826 /* First see if it is a matching closing bracket. Hopefully, this is more 827 efficient than checking if it is a closing bracket at all */ 828 c=bd->pBiDi->text[position]; 829 pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 830 for(i=pLastIsoRun->limit-1; i>=pLastIsoRun->start; i--) { 831 if(bd->openings[i].match!=c) 832 continue; 833 /* We have a match */ 834 pOpening=&bd->openings[i]; 835 direction=pLastIsoRun->level&1; 836 stable=TRUE; /* assume stable until proved otherwise */ 837 838 /* The stable flag is set when brackets are paired and their 839 level is resolved and cannot be changed by what will be 840 found later in the source string. 841 An unstable match can occur only when applying N0c, where 842 the resolved level depends on the preceding context, and 843 this context may be affected by text occurring later. 844 Example: RTL paragraph containing: abc[(latin) HEBREW] 845 When the closing parenthesis is encountered, it appears 846 that N0c1 must be applied since 'abc' sets an opposite 847 direction context and both parentheses receive level 2. 848 However, when the closing square bracket is processed, 849 N0b applies because of 'HEBREW' being included within the 850 brackets, thus the square brackets are treated like R and 851 receive level 1. However, this changes the preceding 852 context of the opening parenthesis, and it now appears 853 that N0c2 must be applied to the parentheses rather than 854 N0c1. */ 855 856 if((direction==0 && pOpening->flags&FOUND_L) || 857 (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */ 858 newProp=direction; 859 } 860 else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */ 861 if(direction!=pOpening->contextDir) { 862 newProp=pOpening->contextDir; /* N0c1 */ 863 /* it is stable if there is no preceding text or in 864 conditions too complicated and not worth checking */ 865 stable=(i==pLastIsoRun->start); 866 } 867 else 868 newProp=direction; /* N0c2 */ 869 } 870 else { 871 newProp=BN; /* N0d */ 872 } 873 if(newProp!=BN) { 874 dirProps[pOpening->position]=newProp; 875 dirProps[position]=newProp; 876 pLastIsoRun->contextDir=newProp; 877 pLastIsoRun->contextPos=position; 878 } 879 /* Update nested N0c pairs that may be affected */ 880 if(newProp==direction) 881 fixN0c(bd, i, pOpening->position, newProp); 882 if(stable) { 883 pLastIsoRun->limit=i; /* forget any brackets nested within this pair */ 884 /* remove lower located synonyms if any */ 885 while(pLastIsoRun->limit>pLastIsoRun->start && 886 bd->openings[pLastIsoRun->limit-1].position==pOpening->position) 887 pLastIsoRun->limit--; 888 } 889 else { 890 pOpening->match=-position; 891 /* neutralize lower located synonyms if any */ 892 k=i-1; 893 while(k>=pLastIsoRun->start && 894 bd->openings[k].position==pOpening->position) 895 bd->openings[k--].match=0; 896 /* neutralize any unmatched opening between the current pair; 897 this will also neutralize higher located synonyms if any */ 898 for(k=i+1; k<pLastIsoRun->limit; k++) { 899 qOpening=&bd->openings[k]; 900 if(qOpening->position>=position) 901 break; 902 if(qOpening->match>0) 903 qOpening->match=0; 904 } 905 } 906 return TRUE; 907 } 908 /* We get here only if the ON character was not a matching closing bracket */ 909 /* Now see if it is an opening bracket */ 910 match=u_getBidiPairedBracket(c); /* get the matching char */ 911 if(match==c) /* if no matching char */ 912 return TRUE; 913 if(ubidi_getPairedBracketType(bd->pBiDi->bdp, c)!=U_BPT_OPEN) 914 return TRUE; /* not an opening bracket */ 915 /* special case: process synonyms 916 create an opening entry for each synonym */ 917 if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ 918 if(!bracketAddOpening(bd, 0x3009, position)) 919 return FALSE; 920 } 921 else if(match==0x3009) { /* RIGHT ANGLE BRACKET */ 922 if(!bracketAddOpening(bd, 0x232A, position)) 923 return FALSE; 924 } 925 return bracketAddOpening(bd, match, position); 926} 927 928/* perform (X1)..(X9) ------------------------------------------------------- */ 929 930/* determine if the text is mixed-directional or single-directional */ 931static UBiDiDirection 932directionFromFlags(UBiDi *pBiDi) { 933 Flags flags=pBiDi->flags; 934 /* if the text contains AN and neutrals, then some neutrals may become RTL */ 935 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { 936 return UBIDI_LTR; 937 } else if(!(flags&MASK_LTR)) { 938 return UBIDI_RTL; 939 } else { 940 return UBIDI_MIXED; 941 } 942} 943 944/* 945 * Resolve the explicit levels as specified by explicit embedding codes. 946 * Recalculate the flags to have them reflect the real properties 947 * after taking the explicit embeddings into account. 948 * 949 * The BiDi algorithm is designed to result in the same behavior whether embedding 950 * levels are externally specified (from "styled text", supposedly the preferred 951 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text. 952 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN). 953 * However, in a real implementation, the removal of these codes and their index 954 * positions in the plain text is undesirable since it would result in 955 * reallocated, reindexed text. 956 * Instead, this implementation leaves the codes in there and just ignores them 957 * in the subsequent processing. 958 * In order to get the same reordering behavior, positions with a BN or a not-isolate 959 * explicit embedding code just get the same level assigned as the last "real" 960 * character. 961 * 962 * Some implementations, not this one, then overwrite some of these 963 * directionality properties at "real" same-level-run boundaries by 964 * L or R codes so that the resolution of weak types can be performed on the 965 * entire paragraph at once instead of having to parse it once more and 966 * perform that resolution on same-level-runs. 967 * This limits the scope of the implicit rules in effectively 968 * the same way as the run limits. 969 * 970 * Instead, this implementation does not modify these codes, except for 971 * paired brackets whose properties (ON) may be replaced by L or R. 972 * On one hand, the paragraph has to be scanned for same-level-runs, but 973 * on the other hand, this saves another loop to reset these codes, 974 * or saves making and modifying a copy of dirProps[]. 975 * 976 * 977 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. 978 * 979 * 980 * Handling the stack of explicit levels (Xn): 981 * 982 * With the BiDi stack of explicit levels, as pushed with each 983 * LRE, RLE, LRO, RLO, LRI, RLI and FSO and popped with each PDF and PDI, 984 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL. 985 * 986 * In order to have a correct push-pop semantics even in the case of overflows, 987 * overflow counters and a valid isolate counter are used as described in UAX#9 988 * section 3.3.2 "Explicit Levels and Directions". 989 * 990 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. 991 */ 992static UBiDiDirection 993resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { 994 DirProp *dirProps=pBiDi->dirProps; 995 UBiDiLevel *levels=pBiDi->levels; 996 const UChar *text=pBiDi->text; 997 998 int32_t i=0, length=pBiDi->length; 999 Flags flags=pBiDi->flags; /* collect all directionalities in the text */ 1000 DirProp dirProp; 1001 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); 1002 UBiDiDirection direction; 1003 pBiDi->isolateCount=0; 1004 1005 if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; } 1006 1007 /* determine if the text is mixed-directional or single-directional */ 1008 direction=directionFromFlags(pBiDi); 1009 1010 /* we may not need to resolve any explicit levels */ 1011 if((direction!=UBIDI_MIXED)) { 1012 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ 1013 return direction; 1014 } 1015 if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) { 1016 /* inverse BiDi: mixed, but all characters are at the same embedding level */ 1017 /* set all levels to the paragraph level */ 1018 int32_t paraIndex, start, limit; 1019 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) { 1020 if(paraIndex==0) 1021 start=0; 1022 else 1023 start=pBiDi->paras[paraIndex-1].limit; 1024 limit=pBiDi->paras[paraIndex].limit; 1025 level=pBiDi->paras[paraIndex].level; 1026 for(i=start; i<limit; i++) 1027 levels[i]=level; 1028 } 1029 return direction; /* no bracket matching for inverse BiDi */ 1030 } 1031 if(!(flags&(MASK_EXPLICIT|MASK_ISO))) { 1032 /* no embeddings, set all levels to the paragraph level */ 1033 /* we still have to perform bracket matching */ 1034 int32_t paraIndex, start, limit; 1035 BracketData bracketData; 1036 bracketInit(pBiDi, &bracketData); 1037 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) { 1038 if(paraIndex==0) 1039 start=0; 1040 else 1041 start=pBiDi->paras[paraIndex-1].limit; 1042 limit=pBiDi->paras[paraIndex].limit; 1043 level=pBiDi->paras[paraIndex].level; 1044 for(i=start; i<limit; i++) { 1045 levels[i]=level; 1046 dirProp=dirProps[i]; 1047 if(dirProp==B) { 1048 if((i+1)<length) { 1049 if(text[i]==CR && text[i+1]==LF) 1050 continue; /* skip CR when followed by LF */ 1051 bracketProcessB(&bracketData, level); 1052 } 1053 continue; 1054 } 1055 if(!bracketProcessChar(&bracketData, i, dirProp)) { 1056 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1057 return UBIDI_LTR; 1058 } 1059 } 1060 } 1061 return direction; 1062 } 1063 { 1064 /* continue to perform (Xn) */ 1065 1066 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ 1067 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */ 1068 UBiDiLevel embeddingLevel=level, newLevel; 1069 UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */ 1070 int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */ 1071 1072 uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL 1073 but we need one more entry as base */ 1074 uint32_t stackLast=0; 1075 int32_t overflowIsolateCount=0; 1076 int32_t overflowEmbeddingCount=0; 1077 int32_t validIsolateCount=0; 1078 BracketData bracketData; 1079 bracketInit(pBiDi, &bracketData); 1080 stack[0]=level; /* initialize base entry to para level, no override, no isolate */ 1081 1082 /* recalculate the flags */ 1083 flags=0; 1084 1085 for(i=0; i<length; ++i) { 1086 dirProp=dirProps[i]; 1087 switch(dirProp) { 1088 case LRE: 1089 case RLE: 1090 case LRO: 1091 case RLO: 1092 /* (X2, X3, X4, X5) */ 1093 flags|=DIRPROP_FLAG(BN); 1094 if (dirProp==LRE || dirProp==LRO) 1095 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */ 1096 else 1097 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */ 1098 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 && 1099 overflowEmbeddingCount==0) { 1100 lastCcPos=i; 1101 embeddingLevel=newLevel; 1102 if(dirProp==LRO || dirProp==RLO) 1103 embeddingLevel|=UBIDI_LEVEL_OVERRIDE; 1104 stackLast++; 1105 stack[stackLast]=embeddingLevel; 1106 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE 1107 since this has already been done for newLevel which is 1108 the source for embeddingLevel. 1109 */ 1110 } else { 1111 dirProps[i]|=IGNORE_CC; 1112 if(overflowIsolateCount==0) 1113 overflowEmbeddingCount++; 1114 } 1115 break; 1116 case PDF: 1117 /* (X7) */ 1118 flags|=DIRPROP_FLAG(BN); 1119 /* handle all the overflow cases first */ 1120 if(overflowIsolateCount) { 1121 dirProps[i]|=IGNORE_CC; 1122 break; 1123 } 1124 if(overflowEmbeddingCount) { 1125 dirProps[i]|=IGNORE_CC; 1126 overflowEmbeddingCount--; 1127 break; 1128 } 1129 if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */ 1130 lastCcPos=i; 1131 stackLast--; 1132 embeddingLevel=(UBiDiLevel)stack[stackLast]; 1133 } else 1134 dirProps[i]|=IGNORE_CC; 1135 break; 1136 case LRI: 1137 case RLI: 1138 if(embeddingLevel!=previousLevel) { 1139 bracketProcessBoundary(&bracketData, lastCcPos, 1140 previousLevel, embeddingLevel); 1141 previousLevel=embeddingLevel; 1142 } 1143 /* (X5a, X5b) */ 1144 flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel); 1145 level=embeddingLevel; 1146 if(dirProp==LRI) 1147 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */ 1148 else 1149 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */ 1150 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 && 1151 overflowEmbeddingCount==0) { 1152 lastCcPos=i; 1153 previousLevel=embeddingLevel; 1154 validIsolateCount++; 1155 if(validIsolateCount>pBiDi->isolateCount) 1156 pBiDi->isolateCount=validIsolateCount; 1157 embeddingLevel=newLevel; 1158 stackLast++; 1159 stack[stackLast]=embeddingLevel+ISOLATE; 1160 bracketProcessLRI_RLI(&bracketData, embeddingLevel); 1161 } else { 1162 dirProps[i]|=IGNORE_CC; 1163 overflowIsolateCount++; 1164 } 1165 break; 1166 case PDI: 1167 if(embeddingLevel!=previousLevel) { 1168 bracketProcessBoundary(&bracketData, lastCcPos, 1169 previousLevel, embeddingLevel); 1170 } 1171 /* (X6a) */ 1172 if(overflowIsolateCount) { 1173 dirProps[i]|=IGNORE_CC; 1174 overflowIsolateCount--; 1175 } 1176 else if(validIsolateCount) { 1177 lastCcPos=i; 1178 overflowEmbeddingCount=0; 1179 while(stack[stackLast]<ISOLATE) /* pop embedding entries */ 1180 stackLast--; /* until the last isolate entry */ 1181 stackLast--; /* pop also the last isolate entry */ 1182 validIsolateCount--; 1183 bracketProcessPDI(&bracketData); 1184 } else 1185 dirProps[i]|=IGNORE_CC; 1186 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE; 1187 previousLevel=level=embeddingLevel; 1188 flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel); 1189 break; 1190 case B: 1191 level=GET_PARALEVEL(pBiDi, i); 1192 if((i+1)<length) { 1193 if(text[i]==CR && text[i+1]==LF) 1194 break; /* skip CR when followed by LF */ 1195 overflowEmbeddingCount=overflowIsolateCount=0; 1196 validIsolateCount=0; 1197 stackLast=0; 1198 stack[0]=level; /* initialize base entry to para level, no override, no isolate */ 1199 previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1); 1200 bracketProcessB(&bracketData, embeddingLevel); 1201 } 1202 flags|=DIRPROP_FLAG(B); 1203 break; 1204 case BN: 1205 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ 1206 /* they will get their levels set correctly in adjustWSLevels() */ 1207 flags|=DIRPROP_FLAG(BN); 1208 break; 1209 default: 1210 /* all other types get the "real" level */ 1211 level=embeddingLevel; 1212 if(embeddingLevel!=previousLevel) { 1213 bracketProcessBoundary(&bracketData, lastCcPos, 1214 previousLevel, embeddingLevel); 1215 previousLevel=embeddingLevel; 1216 } 1217 if(level&UBIDI_LEVEL_OVERRIDE) 1218 flags|=DIRPROP_FLAG_LR(level); 1219 else 1220 flags|=DIRPROP_FLAG(dirProp); 1221 if(!bracketProcessChar(&bracketData, i, dirProp)) 1222 return -1; 1223 break; 1224 } 1225 1226 /* 1227 * We need to set reasonable levels even on BN codes and 1228 * explicit codes because we will later look at same-level runs (X10). 1229 */ 1230 levels[i]=level; 1231 if(i>0 && levels[i-1]!=level) { 1232 flags|=DIRPROP_FLAG_MULTI_RUNS; 1233 if(level&UBIDI_LEVEL_OVERRIDE) 1234 flags|=DIRPROP_FLAG_O(level); 1235 else 1236 flags|=DIRPROP_FLAG_E(level); 1237 } 1238 if(DIRPROP_FLAG(dirProp)&MASK_ISO) 1239 level=embeddingLevel; 1240 } 1241 if(flags&MASK_EMBEDDING) { 1242 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 1243 } 1244 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { 1245 flags|=DIRPROP_FLAG(L); 1246 } 1247 1248 /* subsequently, ignore the explicit codes and BN (X9) */ 1249 1250 /* again, determine if the text is mixed-directional or single-directional */ 1251 pBiDi->flags=flags; 1252 direction=directionFromFlags(pBiDi); 1253 } 1254 return direction; 1255} 1256 1257/* 1258 * Use a pre-specified embedding levels array: 1259 * 1260 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), 1261 * ignore all explicit codes (X9), 1262 * and check all the preset levels. 1263 * 1264 * Recalculate the flags to have them reflect the real properties 1265 * after taking the explicit embeddings into account. 1266 */ 1267static UBiDiDirection 1268checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { 1269 DirProp *dirProps=pBiDi->dirProps; 1270 DirProp dirProp; 1271 UBiDiLevel *levels=pBiDi->levels; 1272 int32_t isolateCount=0; 1273 1274 int32_t i, length=pBiDi->length; 1275 Flags flags=0; /* collect all directionalities in the text */ 1276 UBiDiLevel level; 1277 pBiDi->isolateCount=0; 1278 1279 for(i=0; i<length; ++i) { 1280 level=levels[i]; 1281 dirProp=dirProps[i]; 1282 if(dirProp==LRI || dirProp==RLI) { 1283 isolateCount++; 1284 if(isolateCount>pBiDi->isolateCount) 1285 pBiDi->isolateCount=isolateCount; 1286 } 1287 else if(dirProp==PDI) 1288 isolateCount--; 1289 else if(dirProp==B) 1290 isolateCount=0; 1291 if(level&UBIDI_LEVEL_OVERRIDE) { 1292 /* keep the override flag in levels[i] but adjust the flags */ 1293 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ 1294 flags|=DIRPROP_FLAG_O(level); 1295 } else { 1296 /* set the flags */ 1297 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); 1298 } 1299 if((level<GET_PARALEVEL(pBiDi, i) && 1300 !((0==level)&&(dirProp==B))) || 1301 (UBIDI_MAX_EXPLICIT_LEVEL<level)) { 1302 /* level out of bounds */ 1303 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1304 return UBIDI_LTR; 1305 } 1306 } 1307 if(flags&MASK_EMBEDDING) { 1308 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 1309 } 1310 1311 /* determine if the text is mixed-directional or single-directional */ 1312 pBiDi->flags=flags; 1313 return directionFromFlags(pBiDi); 1314} 1315 1316/****************************************************************** 1317 The Properties state machine table 1318******************************************************************* 1319 1320 All table cells are 8 bits: 1321 bits 0..4: next state 1322 bits 5..7: action to perform (if > 0) 1323 1324 Cells may be of format "n" where n represents the next state 1325 (except for the rightmost column). 1326 Cells may also be of format "s(x,y)" where x represents an action 1327 to perform and y represents the next state. 1328 1329******************************************************************* 1330 Definitions and type for properties state table 1331******************************************************************* 1332*/ 1333#define IMPTABPROPS_COLUMNS 16 1334#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) 1335#define GET_STATEPROPS(cell) ((cell)&0x1f) 1336#define GET_ACTIONPROPS(cell) ((cell)>>5) 1337#define s(action, newState) ((uint8_t)(newState+(action<<5))) 1338 1339static const uint8_t groupProp[] = /* dirProp regrouped */ 1340{ 1341/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */ 1342 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14 1343}; 1344enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ 1345 1346/****************************************************************** 1347 1348 PROPERTIES STATE TABLE 1349 1350 In table impTabProps, 1351 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI 1352 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF 1353 - the Res column is the reduced property assigned to a run 1354 1355 Action 1: process current run1, init new run1 1356 2: init new run2 1357 3: process run1, process run2, init new run1 1358 4: process run1, set run1=run2, init new run2 1359 1360 Notes: 1361 1) This table is used in resolveImplicitLevels(). 1362 2) This table triggers actions when there is a change in the Bidi 1363 property of incoming characters (action 1). 1364 3) Most such property sequences are processed immediately (in 1365 fact, passed to processPropertySeq(). 1366 4) However, numbers are assembled as one sequence. This means 1367 that undefined situations (like CS following digits, until 1368 it is known if the next char will be a digit) are held until 1369 following chars define them. 1370 Example: digits followed by CS, then comes another CS or ON; 1371 the digits will be processed, then the CS assigned 1372 as the start of an ON sequence (action 3). 1373 5) There are cases where more than one sequence must be 1374 processed, for instance digits followed by CS followed by L: 1375 the digits must be processed as one sequence, and the CS 1376 must be processed as an ON sequence, all this before starting 1377 assembling chars for the opening L sequence. 1378 1379 1380*/ 1381static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = 1382{ 1383/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */ 1384/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON }, 1385/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L }, 1386/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R }, 1387/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R }, 1388/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN }, 1389/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN }, 1390/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN }, 1391/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON }, 1392/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON }, 1393/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON }, 1394/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN }, 1395/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN }, 1396/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN }, 1397/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN }, 1398/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON }, 1399/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S }, 1400/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S }, 1401/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B }, 1402/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L }, 1403/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L }, 1404/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L }, 1405/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN }, 1406/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN }, 1407/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN } 1408}; 1409 1410/* we must undef macro s because the levels table have a different 1411 * structure (4 bits for action and 4 bits for next state. 1412 */ 1413#undef s 1414 1415/****************************************************************** 1416 The levels state machine tables 1417******************************************************************* 1418 1419 All table cells are 8 bits: 1420 bits 0..3: next state 1421 bits 4..7: action to perform (if > 0) 1422 1423 Cells may be of format "n" where n represents the next state 1424 (except for the rightmost column). 1425 Cells may also be of format "s(x,y)" where x represents an action 1426 to perform and y represents the next state. 1427 1428 This format limits each table to 16 states each and to 15 actions. 1429 1430******************************************************************* 1431 Definitions and type for levels state tables 1432******************************************************************* 1433*/ 1434#define IMPTABLEVELS_COLUMNS (DirProp_B + 2) 1435#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) 1436#define GET_STATE(cell) ((cell)&0x0f) 1437#define GET_ACTION(cell) ((cell)>>4) 1438#define s(action, newState) ((uint8_t)(newState+(action<<4))) 1439 1440typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; 1441typedef uint8_t ImpAct[]; 1442 1443/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, 1444 * instead of having a pair of ImpTab and a pair of ImpAct. 1445 */ 1446typedef struct ImpTabPair { 1447 const void * pImpTab[2]; 1448 const void * pImpAct[2]; 1449} ImpTabPair; 1450 1451/****************************************************************** 1452 1453 LEVELS STATE TABLES 1454 1455 In all levels state tables, 1456 - state 0 is the initial state 1457 - the Res column is the increment to add to the text level 1458 for this property sequence. 1459 1460 The impAct arrays for each table of a pair map the local action 1461 numbers of the table to the total list of actions. For instance, 1462 action 2 in a given table corresponds to the action number which 1463 appears in entry [2] of the impAct array for that table. 1464 The first entry of all impAct arrays must be 0. 1465 1466 Action 1: init conditional sequence 1467 2: prepend conditional sequence to current sequence 1468 3: set ON sequence to new level - 1 1469 4: init EN/AN/ON sequence 1470 5: fix EN/AN/ON sequence followed by R 1471 6: set previous level sequence to level 2 1472 1473 Notes: 1474 1) These tables are used in processPropertySeq(). The input 1475 is property sequences as determined by resolveImplicitLevels. 1476 2) Most such property sequences are processed immediately 1477 (levels are assigned). 1478 3) However, some sequences cannot be assigned a final level till 1479 one or more following sequences are received. For instance, 1480 ON following an R sequence within an even-level paragraph. 1481 If the following sequence is R, the ON sequence will be 1482 assigned basic run level+1, and so will the R sequence. 1483 4) S is generally handled like ON, since its level will be fixed 1484 to paragraph level in adjustWSLevels(). 1485 1486*/ 1487 1488static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ 1489/* In this table, conditional sequences receive the higher possible level 1490 until proven otherwise. 1491*/ 1492{ 1493/* L , R , EN , AN , ON , S , B , Res */ 1494/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, 1495/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, 1496/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, 1497/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, 1498/* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 }, 1499/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 } 1500}; 1501static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ 1502/* In this table, conditional sequences receive the lower possible level 1503 until proven otherwise. 1504*/ 1505{ 1506/* L , R , EN , AN , ON , S , B , Res */ 1507/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, 1508/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, 1509/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, 1510/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, 1511/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, 1512/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } 1513}; 1514static const ImpAct impAct0 = {0,1,2,3,4,5,6}; 1515static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, 1516 &impTabR_DEFAULT}, 1517 {&impAct0, &impAct0}}; 1518 1519static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ 1520/* In this table, conditional sequences receive the higher possible level 1521 until proven otherwise. 1522*/ 1523{ 1524/* L , R , EN , AN , ON , S , B , Res */ 1525/* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 }, 1526/* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 }, 1527/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 }, 1528/* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 }, 1529/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } 1530 }; 1531static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, 1532 &impTabR_DEFAULT}, 1533 {&impAct0, &impAct0}}; 1534 1535static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = 1536/* In this table, EN/AN+ON sequences receive levels as if associated with R 1537 until proven that there is L or sor/eor on both sides. AN is handled like EN. 1538*/ 1539{ 1540/* L , R , EN , AN , ON , S , B , Res */ 1541/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, 1542/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, 1543/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, 1544/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, 1545/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, 1546/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } 1547}; 1548static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = 1549/* In this table, EN/AN+ON sequences receive levels as if associated with R 1550 until proven that there is L on both sides. AN is handled like EN. 1551*/ 1552{ 1553/* L , R , EN , AN , ON , S , B , Res */ 1554/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1555/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, 1556/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, 1557/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, 1558/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } 1559}; 1560static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { 1561 {&impTabL_GROUP_NUMBERS_WITH_R, 1562 &impTabR_GROUP_NUMBERS_WITH_R}, 1563 {&impAct0, &impAct0}}; 1564 1565 1566static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = 1567/* This table is identical to the Default LTR table except that EN and AN are 1568 handled like L. 1569*/ 1570{ 1571/* L , R , EN , AN , ON , S , B , Res */ 1572/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, 1573/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, 1574/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, 1575/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, 1576/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, 1577/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } 1578}; 1579static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = 1580/* This table is identical to the Default RTL table except that EN and AN are 1581 handled like L. 1582*/ 1583{ 1584/* L , R , EN , AN , ON , S , B , Res */ 1585/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1586/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, 1587/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, 1588/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, 1589/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, 1590/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } 1591}; 1592static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { 1593 {&impTabL_INVERSE_NUMBERS_AS_L, 1594 &impTabR_INVERSE_NUMBERS_AS_L}, 1595 {&impAct0, &impAct0}}; 1596 1597static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ 1598/* In this table, conditional sequences receive the lower possible level 1599 until proven otherwise. 1600*/ 1601{ 1602/* L , R , EN , AN , ON , S , B , Res */ 1603/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, 1604/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, 1605/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, 1606/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, 1607/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, 1608/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, 1609/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } 1610}; 1611static const ImpAct impAct1 = {0,1,11,12}; 1612/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" 1613 */ 1614static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { 1615 {&impTabL_DEFAULT, 1616 &impTabR_INVERSE_LIKE_DIRECT}, 1617 {&impAct0, &impAct1}}; 1618 1619static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = 1620/* The case handled in this table is (visually): R EN L 1621*/ 1622{ 1623/* L , R , EN , AN , ON , S , B , Res */ 1624/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, 1625/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, 1626/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, 1627/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, 1628/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, 1629/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, 1630/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } 1631}; 1632static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = 1633/* The cases handled in this table are (visually): R EN L 1634 R L AN L 1635*/ 1636{ 1637/* L , R , EN , AN , ON , S , B , Res */ 1638/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1639/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, 1640/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, 1641/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, 1642/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, 1643/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, 1644/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } 1645}; 1646static const ImpAct impAct2 = {0,1,7,8,9,10}; 1647static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { 1648 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, 1649 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, 1650 {&impAct0, &impAct2}}; 1651 1652static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { 1653 {&impTabL_NUMBERS_SPECIAL, 1654 &impTabR_INVERSE_LIKE_DIRECT}, 1655 {&impAct0, &impAct1}}; 1656 1657static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = 1658/* The case handled in this table is (visually): R EN L 1659*/ 1660{ 1661/* L , R , EN , AN , ON , S , B , Res */ 1662/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, 1663/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, 1664/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, 1665/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, 1666/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } 1667}; 1668static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { 1669 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, 1670 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, 1671 {&impAct0, &impAct2}}; 1672 1673#undef s 1674 1675typedef struct { 1676 const ImpTab * pImpTab; /* level table pointer */ 1677 const ImpAct * pImpAct; /* action map array */ 1678 int32_t startON; /* start of ON sequence */ 1679 int32_t startL2EN; /* start of level 2 sequence */ 1680 int32_t lastStrongRTL; /* index of last found R or AL */ 1681 int32_t state; /* current state */ 1682 int32_t runStart; /* start position of the run */ 1683 UBiDiLevel runLevel; /* run level before implicit solving */ 1684} LevState; 1685 1686/*------------------------------------------------------------------------*/ 1687 1688static void 1689addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) 1690 /* param pos: position where to insert 1691 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER 1692 */ 1693{ 1694#define FIRSTALLOC 10 1695 Point point; 1696 InsertPoints * pInsertPoints=&(pBiDi->insertPoints); 1697 1698 if (pInsertPoints->capacity == 0) 1699 { 1700 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC); 1701 if (pInsertPoints->points == NULL) 1702 { 1703 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; 1704 return; 1705 } 1706 pInsertPoints->capacity=FIRSTALLOC; 1707 } 1708 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ 1709 { 1710 void * savePoints=pInsertPoints->points; 1711 pInsertPoints->points=uprv_realloc(pInsertPoints->points, 1712 pInsertPoints->capacity*2*sizeof(Point)); 1713 if (pInsertPoints->points == NULL) 1714 { 1715 pInsertPoints->points=savePoints; 1716 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; 1717 return; 1718 } 1719 else pInsertPoints->capacity*=2; 1720 } 1721 point.pos=pos; 1722 point.flag=flag; 1723 pInsertPoints->points[pInsertPoints->size]=point; 1724 pInsertPoints->size++; 1725#undef FIRSTALLOC 1726} 1727 1728/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ 1729 1730/* 1731 * This implementation of the (Wn) rules applies all rules in one pass. 1732 * In order to do so, it needs a look-ahead of typically 1 character 1733 * (except for W5: sequences of ET) and keeps track of changes 1734 * in a rule Wp that affect a later Wq (p<q). 1735 * 1736 * The (Nn) and (In) rules are also performed in that same single loop, 1737 * but effectively one iteration behind for white space. 1738 * 1739 * Since all implicit rules are performed in one step, it is not necessary 1740 * to actually store the intermediate directional properties in dirProps[]. 1741 */ 1742 1743static void 1744processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, 1745 int32_t start, int32_t limit) { 1746 uint8_t cell, oldStateSeq, actionSeq; 1747 const ImpTab * pImpTab=pLevState->pImpTab; 1748 const ImpAct * pImpAct=pLevState->pImpAct; 1749 UBiDiLevel * levels=pBiDi->levels; 1750 UBiDiLevel level, addLevel; 1751 InsertPoints * pInsertPoints; 1752 int32_t start0, k; 1753 1754 start0=start; /* save original start position */ 1755 oldStateSeq=(uint8_t)pLevState->state; 1756 cell=(*pImpTab)[oldStateSeq][_prop]; 1757 pLevState->state=GET_STATE(cell); /* isolate the new state */ 1758 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ 1759 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; 1760 1761 if(actionSeq) { 1762 switch(actionSeq) { 1763 case 1: /* init ON seq */ 1764 pLevState->startON=start0; 1765 break; 1766 1767 case 2: /* prepend ON seq to current seq */ 1768 start=pLevState->startON; 1769 break; 1770 1771 case 3: /* L or S after possible relevant EN/AN */ 1772 /* check if we had EN after R/AL */ 1773 if (pLevState->startL2EN >= 0) { 1774 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); 1775 } 1776 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ 1777 /* check if we had any relevant EN/AN after R/AL */ 1778 pInsertPoints=&(pBiDi->insertPoints); 1779 if ((pInsertPoints->capacity == 0) || 1780 (pInsertPoints->size <= pInsertPoints->confirmed)) 1781 { 1782 /* nothing, just clean up */ 1783 pLevState->lastStrongRTL=-1; 1784 /* check if we have a pending conditional segment */ 1785 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; 1786 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ 1787 start=pLevState->startON; /* reset to basic run level */ 1788 } 1789 if (_prop == DirProp_S) /* add LRM before S */ 1790 { 1791 addPoint(pBiDi, start0, LRM_BEFORE); 1792 pInsertPoints->confirmed=pInsertPoints->size; 1793 } 1794 break; 1795 } 1796 /* reset previous RTL cont to level for LTR text */ 1797 for (k=pLevState->lastStrongRTL+1; k<start0; k++) 1798 { 1799 /* reset odd level, leave runLevel+2 as is */ 1800 levels[k]=(levels[k] - 2) & ~1; 1801 } 1802 /* mark insert points as confirmed */ 1803 pInsertPoints->confirmed=pInsertPoints->size; 1804 pLevState->lastStrongRTL=-1; 1805 if (_prop == DirProp_S) /* add LRM before S */ 1806 { 1807 addPoint(pBiDi, start0, LRM_BEFORE); 1808 pInsertPoints->confirmed=pInsertPoints->size; 1809 } 1810 break; 1811 1812 case 4: /* R/AL after possible relevant EN/AN */ 1813 /* just clean up */ 1814 pInsertPoints=&(pBiDi->insertPoints); 1815 if (pInsertPoints->capacity > 0) 1816 /* remove all non confirmed insert points */ 1817 pInsertPoints->size=pInsertPoints->confirmed; 1818 pLevState->startON=-1; 1819 pLevState->startL2EN=-1; 1820 pLevState->lastStrongRTL=limit - 1; 1821 break; 1822 1823 case 5: /* EN/AN after R/AL + possible cont */ 1824 /* check for real AN */ 1825 if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) && 1826 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) 1827 { 1828 /* real AN */ 1829 if (pLevState->startL2EN == -1) /* if no relevant EN already found */ 1830 { 1831 /* just note the righmost digit as a strong RTL */ 1832 pLevState->lastStrongRTL=limit - 1; 1833 break; 1834 } 1835 if (pLevState->startL2EN >= 0) /* after EN, no AN */ 1836 { 1837 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); 1838 pLevState->startL2EN=-2; 1839 } 1840 /* note AN */ 1841 addPoint(pBiDi, start0, LRM_BEFORE); 1842 break; 1843 } 1844 /* if first EN/AN after R/AL */ 1845 if (pLevState->startL2EN == -1) { 1846 pLevState->startL2EN=start0; 1847 } 1848 break; 1849 1850 case 6: /* note location of latest R/AL */ 1851 pLevState->lastStrongRTL=limit - 1; 1852 pLevState->startON=-1; 1853 break; 1854 1855 case 7: /* L after R+ON/EN/AN */ 1856 /* include possible adjacent number on the left */ 1857 for (k=start0-1; k>=0 && !(levels[k]&1); k--); 1858 if(k>=0) { 1859 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ 1860 pInsertPoints=&(pBiDi->insertPoints); 1861 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ 1862 } 1863 pLevState->startON=start0; 1864 break; 1865 1866 case 8: /* AN after L */ 1867 /* AN numbers between L text on both sides may be trouble. */ 1868 /* tentatively bracket with LRMs; will be confirmed if followed by L */ 1869 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ 1870 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ 1871 break; 1872 1873 case 9: /* R after L+ON/EN/AN */ 1874 /* false alert, infirm LRMs around previous AN */ 1875 pInsertPoints=&(pBiDi->insertPoints); 1876 pInsertPoints->size=pInsertPoints->confirmed; 1877 if (_prop == DirProp_S) /* add RLM before S */ 1878 { 1879 addPoint(pBiDi, start0, RLM_BEFORE); 1880 pInsertPoints->confirmed=pInsertPoints->size; 1881 } 1882 break; 1883 1884 case 10: /* L after L+ON/AN */ 1885 level=pLevState->runLevel + addLevel; 1886 for(k=pLevState->startON; k<start0; k++) { 1887 if (levels[k]<level) 1888 levels[k]=level; 1889 } 1890 pInsertPoints=&(pBiDi->insertPoints); 1891 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ 1892 pLevState->startON=start0; 1893 break; 1894 1895 case 11: /* L after L+ON+EN/AN/ON */ 1896 level=pLevState->runLevel; 1897 for(k=start0-1; k>=pLevState->startON; k--) { 1898 if(levels[k]==level+3) { 1899 while(levels[k]==level+3) { 1900 levels[k--]-=2; 1901 } 1902 while(levels[k]==level) { 1903 k--; 1904 } 1905 } 1906 if(levels[k]==level+2) { 1907 levels[k]=level; 1908 continue; 1909 } 1910 levels[k]=level+1; 1911 } 1912 break; 1913 1914 case 12: /* R after L+ON+EN/AN/ON */ 1915 level=pLevState->runLevel+1; 1916 for(k=start0-1; k>=pLevState->startON; k--) { 1917 if(levels[k]>level) { 1918 levels[k]-=2; 1919 } 1920 } 1921 break; 1922 1923 default: /* we should never get here */ 1924 U_ASSERT(FALSE); 1925 break; 1926 } 1927 } 1928 if((addLevel) || (start < start0)) { 1929 level=pLevState->runLevel + addLevel; 1930 if(start>=pLevState->runStart) { 1931 for(k=start; k<limit; k++) { 1932 levels[k]=level; 1933 } 1934 } else { 1935 DirProp *dirProps=pBiDi->dirProps, dirProp; 1936 int32_t isolateCount=0; 1937 for(k=start; k<limit; k++) { 1938 dirProp=dirProps[k]; 1939 if(dirProp==PDI) 1940 isolateCount--; 1941 if(isolateCount==0) 1942 levels[k]=level; 1943 if(dirProp==LRI || dirProp==RLI) 1944 isolateCount++; 1945 } 1946 } 1947 } 1948} 1949 1950/** 1951 * Returns the directionality of the last strong character at the end of the prologue, if any. 1952 * Requires prologue!=null. 1953 */ 1954static DirProp 1955lastL_R_AL(UBiDi *pBiDi) { 1956 const UChar *text=pBiDi->prologue; 1957 int32_t length=pBiDi->proLength; 1958 int32_t i; 1959 UChar32 uchar; 1960 DirProp dirProp; 1961 for(i=length; i>0; ) { 1962 /* i is decremented by U16_PREV */ 1963 U16_PREV(text, 0, i, uchar); 1964 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); 1965 if(dirProp==L) { 1966 return DirProp_L; 1967 } 1968 if(dirProp==R || dirProp==AL) { 1969 return DirProp_R; 1970 } 1971 if(dirProp==B) { 1972 return DirProp_ON; 1973 } 1974 } 1975 return DirProp_ON; 1976} 1977 1978/** 1979 * Returns the directionality of the first strong character, or digit, in the epilogue, if any. 1980 * Requires epilogue!=null. 1981 */ 1982static DirProp 1983firstL_R_AL_EN_AN(UBiDi *pBiDi) { 1984 const UChar *text=pBiDi->epilogue; 1985 int32_t length=pBiDi->epiLength; 1986 int32_t i; 1987 UChar32 uchar; 1988 DirProp dirProp; 1989 for(i=0; i<length; ) { 1990 /* i is incremented by U16_NEXT */ 1991 U16_NEXT(text, i, length, uchar); 1992 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); 1993 if(dirProp==L) { 1994 return DirProp_L; 1995 } 1996 if(dirProp==R || dirProp==AL) { 1997 return DirProp_R; 1998 } 1999 if(dirProp==EN) { 2000 return DirProp_EN; 2001 } 2002 if(dirProp==AN) { 2003 return DirProp_AN; 2004 } 2005 } 2006 return DirProp_ON; 2007} 2008 2009static void 2010resolveImplicitLevels(UBiDi *pBiDi, 2011 int32_t start, int32_t limit, 2012 DirProp sor, DirProp eor) { 2013 const DirProp *dirProps=pBiDi->dirProps; 2014 DirProp dirProp; 2015 LevState levState; 2016 int32_t i, start1, start2; 2017 uint16_t oldStateImp, stateImp, actionImp; 2018 uint8_t gprop, resProp, cell; 2019 UBool inverseRTL; 2020 DirProp nextStrongProp=R; 2021 int32_t nextStrongPos=-1; 2022 2023 /* check for RTL inverse BiDi mode */ 2024 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to 2025 * loop on the text characters from end to start. 2026 * This would need a different properties state table (at least different 2027 * actions) and different levels state tables (maybe very similar to the 2028 * LTR corresponding ones. 2029 */ 2030 inverseRTL=(UBool) 2031 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && 2032 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || 2033 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); 2034 2035 /* initialize for property and levels state tables */ 2036 levState.startON=-1; 2037 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 2038 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 2039 levState.runStart=start; 2040 levState.runLevel=pBiDi->levels[start]; 2041 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; 2042 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; 2043 if(start==0 && pBiDi->proLength>0) { 2044 DirProp lastStrong=lastL_R_AL(pBiDi); 2045 if(lastStrong!=DirProp_ON) { 2046 sor=lastStrong; 2047 } 2048 } 2049 /* The isolates[] entries contain enough information to 2050 resume the bidi algorithm in the same state as it was 2051 when it was interrupted by an isolate sequence. */ 2052 if(dirProps[start]==PDI) { 2053 start1=pBiDi->isolates[pBiDi->isolateCount].start1; 2054 stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp; 2055 levState.state=pBiDi->isolates[pBiDi->isolateCount].state; 2056 pBiDi->isolateCount--; 2057 } else { 2058 start1=start; 2059 if(dirProps[start]==NSM) 2060 stateImp = 1 + sor; 2061 else 2062 stateImp=0; 2063 levState.state=0; 2064 processPropertySeq(pBiDi, &levState, sor, start, start); 2065 } 2066 start2=start; 2067 2068 for(i=start; i<=limit; i++) { 2069 if(i>=limit) { 2070 if(limit>start) { 2071 dirProp=pBiDi->dirProps[limit-1]; 2072 if(dirProp==LRI || dirProp==RLI) 2073 break; /* no forced closing for sequence ending with LRI/RLI */ 2074 } 2075 gprop=eor; 2076 } else { 2077 DirProp prop, prop1; 2078 prop=PURE_DIRPROP(dirProps[i]); 2079 if(inverseRTL) { 2080 if(prop==AL) { 2081 /* AL before EN does not make it AN */ 2082 prop=R; 2083 } else if(prop==EN) { 2084 if(nextStrongPos<=i) { 2085 /* look for next strong char (L/R/AL) */ 2086 int32_t j; 2087 nextStrongProp=R; /* set default */ 2088 nextStrongPos=limit; 2089 for(j=i+1; j<limit; j++) { 2090 prop1=dirProps[j]; 2091 if(prop1==L || prop1==R || prop1==AL) { 2092 nextStrongProp=prop1; 2093 nextStrongPos=j; 2094 break; 2095 } 2096 } 2097 } 2098 if(nextStrongProp==AL) { 2099 prop=AN; 2100 } 2101 } 2102 } 2103 gprop=groupProp[prop]; 2104 } 2105 oldStateImp=stateImp; 2106 cell=impTabProps[oldStateImp][gprop]; 2107 stateImp=GET_STATEPROPS(cell); /* isolate the new state */ 2108 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */ 2109 if((i==limit) && (actionImp==0)) { 2110 /* there is an unprocessed sequence if its property == eor */ 2111 actionImp=1; /* process the last sequence */ 2112 } 2113 if(actionImp) { 2114 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES]; 2115 switch(actionImp) { 2116 case 1: /* process current seq1, init new seq1 */ 2117 processPropertySeq(pBiDi, &levState, resProp, start1, i); 2118 start1=i; 2119 break; 2120 case 2: /* init new seq2 */ 2121 start2=i; 2122 break; 2123 case 3: /* process seq1, process seq2, init new seq1 */ 2124 processPropertySeq(pBiDi, &levState, resProp, start1, start2); 2125 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i); 2126 start1=i; 2127 break; 2128 case 4: /* process seq1, set seq1=seq2, init new seq2 */ 2129 processPropertySeq(pBiDi, &levState, resProp, start1, start2); 2130 start1=start2; 2131 start2=i; 2132 break; 2133 default: /* we should never get here */ 2134 U_ASSERT(FALSE); 2135 break; 2136 } 2137 } 2138 } 2139 2140 /* flush possible pending sequence, e.g. ON */ 2141 if(limit==pBiDi->length && pBiDi->epiLength>0) { 2142 DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi); 2143 if(firstStrong!=DirProp_ON) { 2144 eor=firstStrong; 2145 } 2146 } 2147 2148 dirProp=dirProps[limit-1]; 2149 if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) { 2150 pBiDi->isolateCount++; 2151 pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp; 2152 pBiDi->isolates[pBiDi->isolateCount].state=levState.state; 2153 pBiDi->isolates[pBiDi->isolateCount].start1=start1; 2154 } 2155 else 2156 processPropertySeq(pBiDi, &levState, eor, limit, limit); 2157} 2158 2159/* perform (L1) and (X9) ---------------------------------------------------- */ 2160 2161/* 2162 * Reset the embedding levels for some non-graphic characters (L1). 2163 * This function also sets appropriate levels for BN, and 2164 * explicit embedding types that are supposed to have been removed 2165 * from the paragraph in (X9). 2166 */ 2167static void 2168adjustWSLevels(UBiDi *pBiDi) { 2169 const DirProp *dirProps=pBiDi->dirProps; 2170 UBiDiLevel *levels=pBiDi->levels; 2171 int32_t i; 2172 2173 if(pBiDi->flags&MASK_WS) { 2174 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; 2175 Flags flag; 2176 2177 i=pBiDi->trailingWSStart; 2178 while(i>0) { 2179 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ 2180 while(i>0 && (flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])))&MASK_WS) { 2181 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { 2182 levels[i]=0; 2183 } else { 2184 levels[i]=GET_PARALEVEL(pBiDi, i); 2185 } 2186 } 2187 2188 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ 2189 /* here, i+1 is guaranteed to be <length */ 2190 while(i>0) { 2191 flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])); 2192 if(flag&MASK_BN_EXPLICIT) { 2193 levels[i]=levels[i+1]; 2194 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { 2195 levels[i]=0; 2196 break; 2197 } else if(flag&MASK_B_S) { 2198 levels[i]=GET_PARALEVEL(pBiDi, i); 2199 break; 2200 } 2201 } 2202 } 2203 } 2204} 2205 2206U_CAPI void U_EXPORT2 2207ubidi_setContext(UBiDi *pBiDi, 2208 const UChar *prologue, int32_t proLength, 2209 const UChar *epilogue, int32_t epiLength, 2210 UErrorCode *pErrorCode) { 2211 /* check the argument values */ 2212 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 2213 if(pBiDi==NULL || proLength<-1 || epiLength<-1 || 2214 (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) { 2215 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2216 return; 2217 } 2218 2219 if(proLength==-1) { 2220 pBiDi->proLength=u_strlen(prologue); 2221 } else { 2222 pBiDi->proLength=proLength; 2223 } 2224 if(epiLength==-1) { 2225 pBiDi->epiLength=u_strlen(epilogue); 2226 } else { 2227 pBiDi->epiLength=epiLength; 2228 } 2229 pBiDi->prologue=prologue; 2230 pBiDi->epilogue=epilogue; 2231} 2232 2233static void 2234setParaSuccess(UBiDi *pBiDi) { 2235 pBiDi->proLength=0; /* forget the last context */ 2236 pBiDi->epiLength=0; 2237 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ 2238} 2239 2240#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) 2241#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) 2242 2243static void 2244setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, 2245 UBiDiLevel paraLevel, UErrorCode *pErrorCode) { 2246 void *runsOnlyMemory; 2247 int32_t *visualMap; 2248 UChar *visualText; 2249 int32_t saveLength, saveTrailingWSStart; 2250 const UBiDiLevel *levels; 2251 UBiDiLevel *saveLevels; 2252 UBiDiDirection saveDirection; 2253 UBool saveMayAllocateText; 2254 Run *runs; 2255 int32_t visualLength, i, j, visualStart, logicalStart, 2256 runCount, runLength, addedRuns, insertRemove, 2257 start, limit, step, indexOddBit, logicalPos, 2258 index0, index1; 2259 uint32_t saveOptions; 2260 2261 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; 2262 if(length==0) { 2263 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); 2264 goto cleanup3; 2265 } 2266 /* obtain memory for mapping table and visual text */ 2267 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))); 2268 if(runsOnlyMemory==NULL) { 2269 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 2270 goto cleanup3; 2271 } 2272 visualMap=runsOnlyMemory; 2273 visualText=(UChar *)&visualMap[length]; 2274 saveLevels=(UBiDiLevel *)&visualText[length]; 2275 saveOptions=pBiDi->reorderingOptions; 2276 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { 2277 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; 2278 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; 2279 } 2280 paraLevel&=1; /* accept only 0 or 1 */ 2281 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); 2282 if(U_FAILURE(*pErrorCode)) { 2283 goto cleanup3; 2284 } 2285 /* we cannot access directly pBiDi->levels since it is not yet set if 2286 * direction is not MIXED 2287 */ 2288 levels=ubidi_getLevels(pBiDi, pErrorCode); 2289 uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel)); 2290 saveTrailingWSStart=pBiDi->trailingWSStart; 2291 saveLength=pBiDi->length; 2292 saveDirection=pBiDi->direction; 2293 2294 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use 2295 * the visual map and the dirProps array to drive the second call 2296 * to ubidi_setPara (but must make provision for possible removal of 2297 * BiDi controls. Alternatively, only use the dirProps array via 2298 * customized classifier callback. 2299 */ 2300 visualLength=ubidi_writeReordered(pBiDi, visualText, length, 2301 UBIDI_DO_MIRRORING, pErrorCode); 2302 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); 2303 if(U_FAILURE(*pErrorCode)) { 2304 goto cleanup2; 2305 } 2306 pBiDi->reorderingOptions=saveOptions; 2307 2308 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; 2309 paraLevel^=1; 2310 /* Because what we did with reorderingOptions, visualText may be shorter 2311 * than the original text. But we don't want the levels memory to be 2312 * reallocated shorter than the original length, since we need to restore 2313 * the levels as after the first call to ubidi_setpara() before returning. 2314 * We will force mayAllocateText to FALSE before the second call to 2315 * ubidi_setpara(), and will restore it afterwards. 2316 */ 2317 saveMayAllocateText=pBiDi->mayAllocateText; 2318 pBiDi->mayAllocateText=FALSE; 2319 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); 2320 pBiDi->mayAllocateText=saveMayAllocateText; 2321 ubidi_getRuns(pBiDi, pErrorCode); 2322 if(U_FAILURE(*pErrorCode)) { 2323 goto cleanup1; 2324 } 2325 /* check if some runs must be split, count how many splits */ 2326 addedRuns=0; 2327 runCount=pBiDi->runCount; 2328 runs=pBiDi->runs; 2329 visualStart=0; 2330 for(i=0; i<runCount; i++, visualStart+=runLength) { 2331 runLength=runs[i].visualLimit-visualStart; 2332 if(runLength<2) { 2333 continue; 2334 } 2335 logicalStart=GET_INDEX(runs[i].logicalStart); 2336 for(j=logicalStart+1; j<logicalStart+runLength; j++) { 2337 index0=visualMap[j]; 2338 index1=visualMap[j-1]; 2339 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { 2340 addedRuns++; 2341 } 2342 } 2343 } 2344 if(addedRuns) { 2345 if(getRunsMemory(pBiDi, runCount+addedRuns)) { 2346 if(runCount==1) { 2347 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */ 2348 pBiDi->runsMemory[0]=runs[0]; 2349 } 2350 runs=pBiDi->runs=pBiDi->runsMemory; 2351 pBiDi->runCount+=addedRuns; 2352 } else { 2353 goto cleanup1; 2354 } 2355 } 2356 /* split runs which are not consecutive in source text */ 2357 for(i=runCount-1; i>=0; i--) { 2358 runLength= i==0 ? runs[0].visualLimit : 2359 runs[i].visualLimit-runs[i-1].visualLimit; 2360 logicalStart=runs[i].logicalStart; 2361 indexOddBit=GET_ODD_BIT(logicalStart); 2362 logicalStart=GET_INDEX(logicalStart); 2363 if(runLength<2) { 2364 if(addedRuns) { 2365 runs[i+addedRuns]=runs[i]; 2366 } 2367 logicalPos=visualMap[logicalStart]; 2368 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 2369 saveLevels[logicalPos]^indexOddBit); 2370 continue; 2371 } 2372 if(indexOddBit) { 2373 start=logicalStart; 2374 limit=logicalStart+runLength-1; 2375 step=1; 2376 } else { 2377 start=logicalStart+runLength-1; 2378 limit=logicalStart; 2379 step=-1; 2380 } 2381 for(j=start; j!=limit; j+=step) { 2382 index0=visualMap[j]; 2383 index1=visualMap[j+step]; 2384 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { 2385 logicalPos=BIDI_MIN(visualMap[start], index0); 2386 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 2387 saveLevels[logicalPos]^indexOddBit); 2388 runs[i+addedRuns].visualLimit=runs[i].visualLimit; 2389 runs[i].visualLimit-=BIDI_ABS(j-start)+1; 2390 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); 2391 runs[i+addedRuns].insertRemove=insertRemove; 2392 runs[i].insertRemove&=~insertRemove; 2393 start=j+step; 2394 addedRuns--; 2395 } 2396 } 2397 if(addedRuns) { 2398 runs[i+addedRuns]=runs[i]; 2399 } 2400 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); 2401 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 2402 saveLevels[logicalPos]^indexOddBit); 2403 } 2404 2405 cleanup1: 2406 /* restore initial paraLevel */ 2407 pBiDi->paraLevel^=1; 2408 cleanup2: 2409 /* restore real text */ 2410 pBiDi->text=text; 2411 pBiDi->length=saveLength; 2412 pBiDi->originalLength=length; 2413 pBiDi->direction=saveDirection; 2414 /* the saved levels should never excess levelsSize, but we check anyway */ 2415 if(saveLength>pBiDi->levelsSize) { 2416 saveLength=pBiDi->levelsSize; 2417 } 2418 uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel)); 2419 pBiDi->trailingWSStart=saveTrailingWSStart; 2420 /* free memory for mapping table and visual text */ 2421 uprv_free(runsOnlyMemory); 2422 if(pBiDi->runCount>1) { 2423 pBiDi->direction=UBIDI_MIXED; 2424 } 2425 cleanup3: 2426 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; 2427} 2428 2429/* ubidi_setPara ------------------------------------------------------------ */ 2430 2431U_CAPI void U_EXPORT2 2432ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, 2433 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, 2434 UErrorCode *pErrorCode) { 2435 UBiDiDirection direction; 2436 2437 /* check the argument values */ 2438 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 2439 if(pBiDi==NULL || text==NULL || length<-1 || 2440 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) { 2441 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2442 return; 2443 } 2444 2445 if(length==-1) { 2446 length=u_strlen(text); 2447 } 2448 2449 /* special treatment for RUNS_ONLY mode */ 2450 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { 2451 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); 2452 return; 2453 } 2454 2455 /* initialize the UBiDi structure */ 2456 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ 2457 pBiDi->text=text; 2458 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; 2459 pBiDi->paraLevel=paraLevel; 2460 pBiDi->direction=paraLevel&1; 2461 pBiDi->paraCount=1; 2462 2463 pBiDi->dirProps=NULL; 2464 pBiDi->levels=NULL; 2465 pBiDi->runs=NULL; 2466 pBiDi->insertPoints.size=0; /* clean up from last call */ 2467 pBiDi->insertPoints.confirmed=0; /* clean up from last call */ 2468 2469 /* 2470 * Save the original paraLevel if contextual; otherwise, set to 0. 2471 */ 2472 pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel); 2473 2474 if(length==0) { 2475 /* 2476 * For an empty paragraph, create a UBiDi object with the paraLevel and 2477 * the flags and the direction set but without allocating zero-length arrays. 2478 * There is nothing more to do. 2479 */ 2480 if(IS_DEFAULT_LEVEL(paraLevel)) { 2481 pBiDi->paraLevel&=1; 2482 pBiDi->defaultParaLevel=0; 2483 } 2484 pBiDi->flags=DIRPROP_FLAG_LR(paraLevel); 2485 pBiDi->runCount=0; 2486 pBiDi->paraCount=0; 2487 setParaSuccess(pBiDi); /* mark successful setPara */ 2488 return; 2489 } 2490 2491 pBiDi->runCount=-1; 2492 2493 /* allocate paras memory */ 2494 if(pBiDi->parasMemory) 2495 pBiDi->paras=pBiDi->parasMemory; 2496 else 2497 pBiDi->paras=pBiDi->simpleParas; 2498 2499 /* 2500 * Get the directional properties, 2501 * the flags bit-set, and 2502 * determine the paragraph level if necessary. 2503 */ 2504 if(getDirPropsMemory(pBiDi, length)) { 2505 pBiDi->dirProps=pBiDi->dirPropsMemory; 2506 if(!getDirProps(pBiDi)) { 2507 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 2508 return; 2509 } 2510 } else { 2511 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 2512 return; 2513 } 2514 /* the processed length may have changed if UBIDI_OPTION_STREAMING */ 2515 length= pBiDi->length; 2516 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ 2517 2518 /* are explicit levels specified? */ 2519 if(embeddingLevels==NULL) { 2520 /* no: determine explicit levels according to the (Xn) rules */\ 2521 if(getLevelsMemory(pBiDi, length)) { 2522 pBiDi->levels=pBiDi->levelsMemory; 2523 direction=resolveExplicitLevels(pBiDi, pErrorCode); 2524 if(U_FAILURE(*pErrorCode)) { 2525 return; 2526 } 2527 } else { 2528 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 2529 return; 2530 } 2531 } else { 2532 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ 2533 pBiDi->levels=embeddingLevels; 2534 direction=checkExplicitLevels(pBiDi, pErrorCode); 2535 if(U_FAILURE(*pErrorCode)) { 2536 return; 2537 } 2538 } 2539 2540 /* allocate isolate memory */ 2541 if(pBiDi->isolateCount<=SIMPLE_ISOLATES_SIZE) 2542 pBiDi->isolates=pBiDi->simpleIsolates; 2543 else 2544 if(pBiDi->isolateCount<=pBiDi->isolatesSize) 2545 pBiDi->isolates=pBiDi->isolatesMemory; 2546 else { 2547 if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) { 2548 pBiDi->isolates=pBiDi->isolatesMemory; 2549 } else { 2550 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 2551 return; 2552 } 2553 } 2554 pBiDi->isolateCount=-1; /* current isolates stack entry == none */ 2555 2556 /* 2557 * The steps after (X9) in the UBiDi algorithm are performed only if 2558 * the paragraph text has mixed directionality! 2559 */ 2560 pBiDi->direction=direction; 2561 switch(direction) { 2562 case UBIDI_LTR: 2563 /* make sure paraLevel is even */ 2564 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1); 2565 2566 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ 2567 pBiDi->trailingWSStart=0; 2568 break; 2569 case UBIDI_RTL: 2570 /* make sure paraLevel is odd */ 2571 pBiDi->paraLevel|=1; 2572 2573 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ 2574 pBiDi->trailingWSStart=0; 2575 break; 2576 default: 2577 /* 2578 * Choose the right implicit state table 2579 */ 2580 switch(pBiDi->reorderingMode) { 2581 case UBIDI_REORDER_DEFAULT: 2582 pBiDi->pImpTabPair=&impTab_DEFAULT; 2583 break; 2584 case UBIDI_REORDER_NUMBERS_SPECIAL: 2585 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; 2586 break; 2587 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: 2588 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; 2589 break; 2590 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: 2591 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; 2592 break; 2593 case UBIDI_REORDER_INVERSE_LIKE_DIRECT: 2594 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { 2595 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; 2596 } else { 2597 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; 2598 } 2599 break; 2600 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: 2601 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { 2602 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; 2603 } else { 2604 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; 2605 } 2606 break; 2607 default: 2608 /* we should never get here */ 2609 U_ASSERT(FALSE); 2610 break; 2611 } 2612 /* 2613 * If there are no external levels specified and there 2614 * are no significant explicit level codes in the text, 2615 * then we can treat the entire paragraph as one run. 2616 * Otherwise, we need to perform the following rules on runs of 2617 * the text with the same embedding levels. (X10) 2618 * "Significant" explicit level codes are ones that actually 2619 * affect non-BN characters. 2620 * Examples for "insignificant" ones are empty embeddings 2621 * LRE-PDF, LRE-RLE-PDF-PDF, etc. 2622 */ 2623 if(embeddingLevels==NULL && pBiDi->paraCount<=1 && 2624 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { 2625 resolveImplicitLevels(pBiDi, 0, length, 2626 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), 2627 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); 2628 } else { 2629 /* sor, eor: start and end types of same-level-run */ 2630 UBiDiLevel *levels=pBiDi->levels; 2631 int32_t start, limit=0; 2632 UBiDiLevel level, nextLevel; 2633 DirProp sor, eor; 2634 2635 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ 2636 level=GET_PARALEVEL(pBiDi, 0); 2637 nextLevel=levels[0]; 2638 if(level<nextLevel) { 2639 eor=GET_LR_FROM_LEVEL(nextLevel); 2640 } else { 2641 eor=GET_LR_FROM_LEVEL(level); 2642 } 2643 2644 do { 2645 /* determine start and limit of the run (end points just behind the run) */ 2646 2647 /* the values for this run's start are the same as for the previous run's end */ 2648 start=limit; 2649 level=nextLevel; 2650 if((start>0) && (pBiDi->dirProps[start-1]==B)) { 2651 /* except if this is a new paragraph, then set sor = para level */ 2652 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); 2653 } else { 2654 sor=eor; 2655 } 2656 2657 /* search for the limit of this run */ 2658 while(++limit<length && levels[limit]==level) {} 2659 2660 /* get the correct level of the next run */ 2661 if(limit<length) { 2662 nextLevel=levels[limit]; 2663 } else { 2664 nextLevel=GET_PARALEVEL(pBiDi, length-1); 2665 } 2666 2667 /* determine eor from max(level, nextLevel); sor is last run's eor */ 2668 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) { 2669 eor=GET_LR_FROM_LEVEL(nextLevel); 2670 } else { 2671 eor=GET_LR_FROM_LEVEL(level); 2672 } 2673 2674 /* if the run consists of overridden directional types, then there 2675 are no implicit types to be resolved */ 2676 if(!(level&UBIDI_LEVEL_OVERRIDE)) { 2677 resolveImplicitLevels(pBiDi, start, limit, sor, eor); 2678 } else { 2679 /* remove the UBIDI_LEVEL_OVERRIDE flags */ 2680 do { 2681 levels[start++]&=~UBIDI_LEVEL_OVERRIDE; 2682 } while(start<limit); 2683 } 2684 } while(limit<length); 2685 } 2686 /* check if we got any memory shortage while adding insert points */ 2687 if (U_FAILURE(pBiDi->insertPoints.errorCode)) 2688 { 2689 *pErrorCode=pBiDi->insertPoints.errorCode; 2690 return; 2691 } 2692 /* reset the embedding levels for some non-graphic characters (L1), (X9) */ 2693 adjustWSLevels(pBiDi); 2694 break; 2695 } 2696 /* add RLM for inverse Bidi with contextual orientation resolving 2697 * to RTL which would not round-trip otherwise 2698 */ 2699 if((pBiDi->defaultParaLevel>0) && 2700 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && 2701 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || 2702 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { 2703 int32_t i, j, start, last; 2704 UBiDiLevel level; 2705 DirProp dirProp; 2706 for(i=0; i<pBiDi->paraCount; i++) { 2707 last=(pBiDi->paras[i].limit)-1; 2708 level=pBiDi->paras[i].level; 2709 if(level==0) 2710 continue; /* LTR paragraph */ 2711 start= i==0 ? 0 : pBiDi->paras[i-1].limit; 2712 for(j=last; j>=start; j--) { 2713 dirProp=pBiDi->dirProps[j]; 2714 if(dirProp==L) { 2715 if(j<last) { 2716 while(pBiDi->dirProps[last]==B) { 2717 last--; 2718 } 2719 } 2720 addPoint(pBiDi, last, RLM_BEFORE); 2721 break; 2722 } 2723 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) { 2724 break; 2725 } 2726 } 2727 } 2728 } 2729 2730 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { 2731 pBiDi->resultLength -= pBiDi->controlCount; 2732 } else { 2733 pBiDi->resultLength += pBiDi->insertPoints.size; 2734 } 2735 setParaSuccess(pBiDi); /* mark successful setPara */ 2736} 2737 2738U_CAPI void U_EXPORT2 2739ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { 2740 if(pBiDi!=NULL) { 2741 pBiDi->orderParagraphsLTR=orderParagraphsLTR; 2742 } 2743} 2744 2745U_CAPI UBool U_EXPORT2 2746ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { 2747 if(pBiDi!=NULL) { 2748 return pBiDi->orderParagraphsLTR; 2749 } else { 2750 return FALSE; 2751 } 2752} 2753 2754U_CAPI UBiDiDirection U_EXPORT2 2755ubidi_getDirection(const UBiDi *pBiDi) { 2756 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2757 return pBiDi->direction; 2758 } else { 2759 return UBIDI_LTR; 2760 } 2761} 2762 2763U_CAPI const UChar * U_EXPORT2 2764ubidi_getText(const UBiDi *pBiDi) { 2765 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2766 return pBiDi->text; 2767 } else { 2768 return NULL; 2769 } 2770} 2771 2772U_CAPI int32_t U_EXPORT2 2773ubidi_getLength(const UBiDi *pBiDi) { 2774 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2775 return pBiDi->originalLength; 2776 } else { 2777 return 0; 2778 } 2779} 2780 2781U_CAPI int32_t U_EXPORT2 2782ubidi_getProcessedLength(const UBiDi *pBiDi) { 2783 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2784 return pBiDi->length; 2785 } else { 2786 return 0; 2787 } 2788} 2789 2790U_CAPI int32_t U_EXPORT2 2791ubidi_getResultLength(const UBiDi *pBiDi) { 2792 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2793 return pBiDi->resultLength; 2794 } else { 2795 return 0; 2796 } 2797} 2798 2799/* paragraphs API functions ------------------------------------------------- */ 2800 2801U_CAPI UBiDiLevel U_EXPORT2 2802ubidi_getParaLevel(const UBiDi *pBiDi) { 2803 if(IS_VALID_PARA_OR_LINE(pBiDi)) { 2804 return pBiDi->paraLevel; 2805 } else { 2806 return 0; 2807 } 2808} 2809 2810U_CAPI int32_t U_EXPORT2 2811ubidi_countParagraphs(UBiDi *pBiDi) { 2812 if(!IS_VALID_PARA_OR_LINE(pBiDi)) { 2813 return 0; 2814 } else { 2815 return pBiDi->paraCount; 2816 } 2817} 2818 2819U_CAPI void U_EXPORT2 2820ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, 2821 int32_t *pParaStart, int32_t *pParaLimit, 2822 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { 2823 int32_t paraStart; 2824 2825 /* check the argument values */ 2826 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 2827 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); 2828 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); 2829 2830 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ 2831 if(paraIndex) { 2832 paraStart=pBiDi->paras[paraIndex-1].limit; 2833 } else { 2834 paraStart=0; 2835 } 2836 if(pParaStart!=NULL) { 2837 *pParaStart=paraStart; 2838 } 2839 if(pParaLimit!=NULL) { 2840 *pParaLimit=pBiDi->paras[paraIndex].limit; 2841 } 2842 if(pParaLevel!=NULL) { 2843 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); 2844 } 2845} 2846 2847U_CAPI int32_t U_EXPORT2 2848ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, 2849 int32_t *pParaStart, int32_t *pParaLimit, 2850 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { 2851 int32_t paraIndex; 2852 2853 /* check the argument values */ 2854 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ 2855 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); 2856 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); 2857 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ 2858 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); 2859 2860 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++); 2861 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); 2862 return paraIndex; 2863} 2864 2865U_CAPI void U_EXPORT2 2866ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, 2867 const void *newContext, UBiDiClassCallback **oldFn, 2868 const void **oldContext, UErrorCode *pErrorCode) 2869{ 2870 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 2871 if(pBiDi==NULL) { 2872 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2873 return; 2874 } 2875 if( oldFn ) 2876 { 2877 *oldFn = pBiDi->fnClassCallback; 2878 } 2879 if( oldContext ) 2880 { 2881 *oldContext = pBiDi->coClassCallback; 2882 } 2883 pBiDi->fnClassCallback = newFn; 2884 pBiDi->coClassCallback = newContext; 2885} 2886 2887U_CAPI void U_EXPORT2 2888ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) 2889{ 2890 if(pBiDi==NULL) { 2891 return; 2892 } 2893 if( fn ) 2894 { 2895 *fn = pBiDi->fnClassCallback; 2896 } 2897 if( context ) 2898 { 2899 *context = pBiDi->coClassCallback; 2900 } 2901} 2902 2903U_CAPI UCharDirection U_EXPORT2 2904ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) 2905{ 2906 UCharDirection dir; 2907 2908 if( pBiDi->fnClassCallback == NULL || 2909 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) 2910 { 2911 dir = ubidi_getClass(pBiDi->bdp, c); 2912 } 2913 if(dir >= U_CHAR_DIRECTION_COUNT) { 2914 dir = ON; 2915 } 2916 return dir; 2917} 2918