1/* 2****************************************************************************** 3* 4* Copyright (C) 1999-2013, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7****************************************************************************** 8* file name: ubidiimp.h 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 1999aug06 14* created by: Markus W. Scherer, updated by Matitiahu Allouche 15*/ 16 17#ifndef UBIDIIMP_H 18#define UBIDIIMP_H 19 20/* set import/export definitions */ 21#ifdef U_COMMON_IMPLEMENTATION 22 23#include "unicode/utypes.h" 24#include "unicode/uchar.h" 25#include "ubidi_props.h" 26 27/* miscellaneous definitions ---------------------------------------------- */ 28 29typedef uint8_t DirProp; 30typedef uint32_t Flags; 31 32/* Comparing the description of the BiDi algorithm with this implementation 33 is easier with the same names for the BiDi types in the code as there. 34 See UCharDirection in uchar.h . 35*/ 36enum { 37 L= U_LEFT_TO_RIGHT, /* 0 */ 38 R= U_RIGHT_TO_LEFT, /* 1 */ 39 EN= U_EUROPEAN_NUMBER, /* 2 */ 40 ES= U_EUROPEAN_NUMBER_SEPARATOR, /* 3 */ 41 ET= U_EUROPEAN_NUMBER_TERMINATOR, /* 4 */ 42 AN= U_ARABIC_NUMBER, /* 5 */ 43 CS= U_COMMON_NUMBER_SEPARATOR, /* 6 */ 44 B= U_BLOCK_SEPARATOR, /* 7 */ 45 S= U_SEGMENT_SEPARATOR, /* 8 */ 46 WS= U_WHITE_SPACE_NEUTRAL, /* 9 */ 47 ON= U_OTHER_NEUTRAL, /* 10 */ 48 LRE=U_LEFT_TO_RIGHT_EMBEDDING, /* 11 */ 49 LRO=U_LEFT_TO_RIGHT_OVERRIDE, /* 12 */ 50 AL= U_RIGHT_TO_LEFT_ARABIC, /* 13 */ 51 RLE=U_RIGHT_TO_LEFT_EMBEDDING, /* 14 */ 52 RLO=U_RIGHT_TO_LEFT_OVERRIDE, /* 15 */ 53 PDF=U_POP_DIRECTIONAL_FORMAT, /* 16 */ 54 NSM=U_DIR_NON_SPACING_MARK, /* 17 */ 55 BN= U_BOUNDARY_NEUTRAL, /* 18 */ 56 FSI=U_FIRST_STRONG_ISOLATE, /* 19 */ 57 LRI=U_LEFT_TO_RIGHT_ISOLATE, /* 20 */ 58 RLI=U_RIGHT_TO_LEFT_ISOLATE, /* 21 */ 59 PDI=U_POP_DIRECTIONAL_ISOLATE, /* 22 */ 60 ENL, /* 23 */ 61 ENR, /* 24 */ 62 dirPropCount 63}; 64 65/* 66 * Sometimes, bit values are more appropriate 67 * to deal with directionality properties. 68 * Abbreviations in these macro names refer to names 69 * used in the BiDi algorithm. 70 */ 71#define DIRPROP_FLAG(dir) (1UL<<(dir)) 72 73/* special flag for multiple runs from explicit embedding codes */ 74#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) 75 76/* are there any characters that are LTR or RTL? */ 77#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI)) 78#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI)) 79#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)) 80#define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)) 81 82/* explicit embedding codes */ 83#define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF)) 84 85/* explicit isolate codes */ 86#define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI)) 87 88#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) 89 90/* paragraph and segment separators */ 91#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) 92 93/* all types that are counted as White Space or Neutral in some steps */ 94#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO) 95 96/* types that are neutrals or could becomes neutrals in (Wn) */ 97#define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS) 98 99/* 100 * These types may be changed to "e", 101 * the embedding type (L or R) of the run, 102 * in the BiDi algorithm (N2) 103 */ 104#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) 105 106/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */ 107#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) 108 109#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe) 110 111/* 112 * The following bit is ORed to the property of directional control 113 * characters which are ignored: unmatched PDF or PDI; LRx, RLx or FSI 114 * which would exceed the maximum explicit bidi level. 115 */ 116#define IGNORE_CC 0x40 117 118#define PURE_DIRPROP(prop) ((prop)&~IGNORE_CC) 119 120/* 121 * The following bit is used for the directional isolate status. 122 * Stack entries corresponding to isolate sequences are greater than ISOLATE. 123 */ 124#define ISOLATE 0x0100 125 126U_CFUNC UBiDiLevel 127ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t index); 128 129#define GET_PARALEVEL(ubidi, index) \ 130 ((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \ 131 (ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index)))) 132 133/* number of paras entries allocated initially without malloc */ 134#define SIMPLE_PARAS_SIZE 10 135/* number of isolate entries allocated initially without malloc */ 136#define SIMPLE_ISOLATES_SIZE 5 137/* number of isolate run entries for paired brackets allocated initially without malloc */ 138#define SIMPLE_OPENINGS_SIZE 20 139 140#define CR 0x000D 141#define LF 0x000A 142 143/* Run structure for reordering --------------------------------------------- */ 144enum { 145 LRM_BEFORE=1, 146 LRM_AFTER=2, 147 RLM_BEFORE=4, 148 RLM_AFTER=8 149}; 150 151typedef struct Para { 152 int32_t limit; 153 int32_t level; 154} Para; 155 156enum { /* flags for Opening.flags */ 157 FOUND_L=DIRPROP_FLAG(L), 158 FOUND_R=DIRPROP_FLAG(R) 159}; 160 161typedef struct Opening { 162 int32_t position; /* position of opening bracket */ 163 int32_t match; /* matching char or -position of closing bracket */ 164 int32_t contextPos; /* position of last strong char found before opening */ 165 uint16_t flags; /* bits for L or R/AL found within the pair */ 166 UBiDiDirection contextDir; /* L or R according to last strong char before opening */ 167 uint8_t filler; /* to complete a nice multiple of 4 chars */ 168} Opening; 169 170typedef struct IsoRun { 171 int32_t lastStrongPos; /* position of last strong char found in this run */ 172 int32_t contextPos; /* position of last char defining context */ 173 uint16_t start; /* index of first opening entry for this run */ 174 uint16_t limit; /* index after last opening entry for this run */ 175 UBiDiLevel level; /* level of this run */ 176 DirProp lastStrong; /* bidi class of last strong char found in this run */ 177 UBiDiDirection contextDir; /* L or R to use as context for following openings */ 178 uint8_t filler; /* to complete a nice multiple of 4 chars */ 179} IsoRun; 180 181typedef struct BracketData { 182 UBiDi *pBiDi; 183 /* array of opening entries which should be enough in most cases; no malloc() */ 184 Opening simpleOpenings[SIMPLE_OPENINGS_SIZE]; 185 Opening *openings; /* pointer to current array of entries */ 186 int32_t openingsCount; /* number of allocated entries */ 187 int32_t isoRunLast; /* index of last used entry */ 188 /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL 189 + 1 for index 0, + 1 for before the first isolated sequence */ 190 IsoRun isoRuns[UBIDI_MAX_EXPLICIT_LEVEL+2]; 191 UBool isNumbersSpecial; /* reordering mode for NUMBERS_SPECIAL */ 192} BracketData; 193 194typedef struct Isolate { 195 int32_t start1; 196 int16_t stateImp; 197 int16_t state; 198} Isolate; 199 200typedef struct Run { 201 int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ 202 visualLimit, /* last visual position of the run +1 */ 203 insertRemove; /* if >0, flags for inserting LRM/RLM before/after run, 204 if <0, count of bidi controls within run */ 205} Run; 206 207/* in a Run, logicalStart will get this bit set if the run level is odd */ 208#define INDEX_ODD_BIT (1UL<<31) 209 210#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31)) 211#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31)) 212#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) 213 214#define GET_INDEX(x) ((x)&~INDEX_ODD_BIT) 215#define GET_ODD_BIT(x) ((uint32_t)(x)>>31) 216#define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0)) 217#define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0)) 218 219U_CFUNC UBool 220ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); 221 222/** BiDi control code points */ 223enum { 224 ZWNJ_CHAR=0x200c, 225 ZWJ_CHAR, 226 LRM_CHAR, 227 RLM_CHAR, 228 LRE_CHAR=0x202a, 229 RLE_CHAR, 230 PDF_CHAR, 231 LRO_CHAR, 232 RLO_CHAR, 233 LRI_CHAR=0x2066, 234 RLI_CHAR, 235 FSI_CHAR, 236 PDI_CHAR 237}; 238 239#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4) 240 241/* InsertPoints structure for noting where to put BiDi marks ---------------- */ 242 243typedef struct Point { 244 int32_t pos; /* position in text */ 245 int32_t flag; /* flag for LRM/RLM, before/after */ 246} Point; 247 248typedef struct InsertPoints { 249 int32_t capacity; /* number of points allocated */ 250 int32_t size; /* number of points used */ 251 int32_t confirmed; /* number of points confirmed */ 252 UErrorCode errorCode; /* for eventual memory shortage */ 253 Point *points; /* pointer to array of points */ 254} InsertPoints; 255 256 257/* UBiDi structure ----------------------------------------------------------- */ 258 259struct UBiDi { 260 /* pointer to parent paragraph object (pointer to self if this object is 261 * a paragraph object); set to NULL in a newly opened object; set to a 262 * real value after a successful execution of ubidi_setPara or ubidi_setLine 263 */ 264 const UBiDi * pParaBiDi; 265 266 const UBiDiProps *bdp; 267 268 /* alias pointer to the current text */ 269 const UChar *text; 270 271 /* length of the current text */ 272 int32_t originalLength; 273 274 /* if the UBIDI_OPTION_STREAMING option is set, this is the length 275 * of text actually processed by ubidi_setPara, which may be shorter than 276 * the original length. 277 * Otherwise, it is identical to the original length. 278 */ 279 int32_t length; 280 281 /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or 282 * marks are allowed to be inserted in one of the reordering mode, the 283 * length of the result string may be different from the processed length. 284 */ 285 int32_t resultLength; 286 287 /* memory sizes in bytes */ 288 int32_t dirPropsSize, levelsSize, openingsSize, parasSize, runsSize, isolatesSize; 289 290 /* allocated memory */ 291 DirProp *dirPropsMemory; 292 UBiDiLevel *levelsMemory; 293 Opening *openingsMemory; 294 Para *parasMemory; 295 Run *runsMemory; 296 Isolate *isolatesMemory; 297 298 /* indicators for whether memory may be allocated after ubidi_open() */ 299 UBool mayAllocateText, mayAllocateRuns; 300 301 /* arrays with one value per text-character */ 302 DirProp *dirProps; 303 UBiDiLevel *levels; 304 305 /* are we performing an approximation of the "inverse BiDi" algorithm? */ 306 UBool isInverse; 307 308 /* are we using the basic algorithm or its variation? */ 309 UBiDiReorderingMode reorderingMode; 310 311 /* UBIDI_REORDER_xxx values must be ordered so that all the regular 312 * logical to visual modes come first, and all inverse BiDi modes 313 * come last. 314 */ 315 #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL 316 317 /* bitmask for reordering options */ 318 uint32_t reorderingOptions; 319 320 /* must block separators receive level 0? */ 321 UBool orderParagraphsLTR; 322 323 /* the paragraph level */ 324 UBiDiLevel paraLevel; 325 /* original paraLevel when contextual */ 326 /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */ 327 UBiDiLevel defaultParaLevel; 328 329 /* context data */ 330 const UChar *prologue; 331 int32_t proLength; 332 const UChar *epilogue; 333 int32_t epiLength; 334 335 /* the following is set in ubidi_setPara, used in processPropertySeq */ 336 const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */ 337 338 /* the overall paragraph or line directionality - see UBiDiDirection */ 339 UBiDiDirection direction; 340 341 /* flags is a bit set for which directional properties are in the text */ 342 Flags flags; 343 344 /* lastArabicPos is index to the last AL in the text, -1 if none */ 345 int32_t lastArabicPos; 346 347 /* characters after trailingWSStart are WS and are */ 348 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ 349 int32_t trailingWSStart; 350 351 /* fields for paragraph handling */ 352 int32_t paraCount; /* set in getDirProps() */ 353 /* filled in getDirProps() */ 354 Para *paras; 355 356 /* for relatively short text, we only need a tiny array of paras (no malloc()) */ 357 Para simpleParas[SIMPLE_PARAS_SIZE]; 358 359 /* fields for line reordering */ 360 int32_t runCount; /* ==-1: runs not set up yet */ 361 Run *runs; 362 363 /* for non-mixed text, we only need a tiny array of runs (no malloc()) */ 364 Run simpleRuns[1]; 365 366 /* maximum or current nesting depth of isolate sequences */ 367 /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal 368 nesting encountered. 369 Within resolveImplicitLevels(), this is the index of the current isolates 370 stack entry. */ 371 int32_t isolateCount; 372 Isolate *isolates; 373 374 /* for simple text, have a small stack (no malloc()) */ 375 Isolate simpleIsolates[SIMPLE_ISOLATES_SIZE]; 376 377 /* for inverse Bidi with insertion of directional marks */ 378 InsertPoints insertPoints; 379 380 /* for option UBIDI_OPTION_REMOVE_CONTROLS */ 381 int32_t controlCount; 382 383 /* for Bidi class callback */ 384 UBiDiClassCallback *fnClassCallback; /* action pointer */ 385 const void *coClassCallback; /* context pointer */ 386}; 387 388#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) 389#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) 390 391typedef union { 392 DirProp *dirPropsMemory; 393 UBiDiLevel *levelsMemory; 394 Opening *openingsMemory; 395 Para *parasMemory; 396 Run *runsMemory; 397 Isolate *isolatesMemory; 398} BidiMemoryForAllocation; 399 400/* Macros for initial checks at function entry */ 401#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) \ 402 if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue 403#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) \ 404 if(!IS_VALID_PARA(bidi)) { \ 405 errcode=U_INVALID_STATE_ERROR; \ 406 return retvalue; \ 407 } 408#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) \ 409 if(!IS_VALID_PARA_OR_LINE(bidi)) { \ 410 errcode=U_INVALID_STATE_ERROR; \ 411 return retvalue; \ 412 } 413#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) \ 414 if((arg)<(start) || (arg)>=(limit)) { \ 415 (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ 416 return retvalue; \ 417 } 418 419#define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) \ 420 if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return 421#define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) \ 422 if(!IS_VALID_PARA(bidi)) { \ 423 errcode=U_INVALID_STATE_ERROR; \ 424 return; \ 425 } 426#define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) \ 427 if(!IS_VALID_PARA_OR_LINE(bidi)) { \ 428 errcode=U_INVALID_STATE_ERROR; \ 429 return; \ 430 } 431#define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) \ 432 if((arg)<(start) || (arg)>=(limit)) { \ 433 (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ 434 return; \ 435 } 436 437/* helper function to (re)allocate memory if allowed */ 438U_CFUNC UBool 439ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded); 440 441/* helper macros for each allocated array in UBiDi */ 442#define getDirPropsMemory(pBiDi, length) \ 443 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ 444 (pBiDi)->mayAllocateText, (length)) 445 446#define getLevelsMemory(pBiDi, length) \ 447 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ 448 (pBiDi)->mayAllocateText, (length)) 449 450#define getRunsMemory(pBiDi, length) \ 451 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ 452 (pBiDi)->mayAllocateRuns, (length)*sizeof(Run)) 453 454/* additional macros used by ubidi_open() - always allow allocation */ 455#define getInitialDirPropsMemory(pBiDi, length) \ 456 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ 457 TRUE, (length)) 458 459#define getInitialLevelsMemory(pBiDi, length) \ 460 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ 461 TRUE, (length)) 462 463#define getInitialOpeningsMemory(pBiDi, length) \ 464 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \ 465 TRUE, (length)*sizeof(Opening)) 466 467#define getInitialParasMemory(pBiDi, length) \ 468 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \ 469 TRUE, (length)*sizeof(Para)) 470 471#define getInitialRunsMemory(pBiDi, length) \ 472 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ 473 TRUE, (length)*sizeof(Run)) 474 475#define getInitialIsolatesMemory(pBiDi, length) \ 476 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \ 477 TRUE, (length)*sizeof(Isolate)) 478 479#endif 480 481#endif 482