1/* 2******************************************************************************** 3* Copyright (C) 1996-2010, International Business Machines 4* Corporation and others. All Rights Reserved. 5******************************************************************************** 6* 7* File UCHAR.C 8* 9* Modification History: 10* 11* Date Name Description 12* 04/02/97 aliu Creation. 13* 4/15/99 Madhu Updated all the function definitions for C Implementation 14* 5/20/99 Madhu Added the function u_getVersion() 15* 8/19/1999 srl Upgraded scripts to Unicode3.0 16* 11/11/1999 weiv added u_isalnum(), cleaned comments 17* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion. 18* 06/20/2000 helena OS/400 port changes; mostly typecast. 19****************************************************************************** 20*/ 21 22#include "unicode/utypes.h" 23#include "unicode/uchar.h" 24#include "unicode/uscript.h" 25#include "unicode/udata.h" 26#include "umutex.h" 27#include "cmemory.h" 28#include "ucln_cmn.h" 29#include "utrie2.h" 30#include "udataswp.h" 31#include "uprops.h" 32#include "ustr_imp.h" 33 34#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 35 36/* uchar_props_data.c is machine-generated by genprops --csource */ 37#include "uchar_props_data.c" 38 39/* constants and macros for access to the data ------------------------------ */ 40 41/* getting a uint32_t properties word from the data */ 42#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); 43 44U_CFUNC UBool 45uprv_haveProperties(UErrorCode *pErrorCode) { 46 if(U_FAILURE(*pErrorCode)) { 47 return FALSE; 48 } 49 return TRUE; 50} 51 52/* API functions ------------------------------------------------------------ */ 53 54/* Gets the Unicode character's general category.*/ 55U_CAPI int8_t U_EXPORT2 56u_charType(UChar32 c) { 57 uint32_t props; 58 GET_PROPS(c, props); 59 return (int8_t)GET_CATEGORY(props); 60} 61 62/* Enumerate all code points with their general categories. */ 63struct _EnumTypeCallback { 64 UCharEnumTypeRange *enumRange; 65 const void *context; 66}; 67 68static uint32_t U_CALLCONV 69_enumTypeValue(const void *context, uint32_t value) { 70 return GET_CATEGORY(value); 71} 72 73static UBool U_CALLCONV 74_enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 75 /* just cast the value to UCharCategory */ 76 return ((struct _EnumTypeCallback *)context)-> 77 enumRange(((struct _EnumTypeCallback *)context)->context, 78 start, end+1, (UCharCategory)value); 79} 80 81U_CAPI void U_EXPORT2 82u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) { 83 struct _EnumTypeCallback callback; 84 85 if(enumRange==NULL) { 86 return; 87 } 88 89 callback.enumRange=enumRange; 90 callback.context=context; 91 utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback); 92} 93 94/* Checks if ch is a lower case letter.*/ 95U_CAPI UBool U_EXPORT2 96u_islower(UChar32 c) { 97 uint32_t props; 98 GET_PROPS(c, props); 99 return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER); 100} 101 102/* Checks if ch is an upper case letter.*/ 103U_CAPI UBool U_EXPORT2 104u_isupper(UChar32 c) { 105 uint32_t props; 106 GET_PROPS(c, props); 107 return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER); 108} 109 110/* Checks if ch is a title case letter; usually upper case letters.*/ 111U_CAPI UBool U_EXPORT2 112u_istitle(UChar32 c) { 113 uint32_t props; 114 GET_PROPS(c, props); 115 return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER); 116} 117 118/* Checks if ch is a decimal digit. */ 119U_CAPI UBool U_EXPORT2 120u_isdigit(UChar32 c) { 121 uint32_t props; 122 GET_PROPS(c, props); 123 return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 124} 125 126U_CAPI UBool U_EXPORT2 127u_isxdigit(UChar32 c) { 128 uint32_t props; 129 130 /* check ASCII and Fullwidth ASCII a-fA-F */ 131 if( 132 (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || 133 (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) 134 ) { 135 return TRUE; 136 } 137 138 GET_PROPS(c, props); 139 return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 140} 141 142/* Checks if the Unicode character is a letter.*/ 143U_CAPI UBool U_EXPORT2 144u_isalpha(UChar32 c) { 145 uint32_t props; 146 GET_PROPS(c, props); 147 return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0); 148} 149 150U_CAPI UBool U_EXPORT2 151u_isUAlphabetic(UChar32 c) { 152 return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0; 153} 154 155/* Checks if c is a letter or a decimal digit */ 156U_CAPI UBool U_EXPORT2 157u_isalnum(UChar32 c) { 158 uint32_t props; 159 GET_PROPS(c, props); 160 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0); 161} 162 163/** 164 * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM. 165 * @internal 166 */ 167U_CFUNC UBool 168u_isalnumPOSIX(UChar32 c) { 169 return (UBool)(u_isUAlphabetic(c) || u_isdigit(c)); 170} 171 172/* Checks if ch is a unicode character with assigned character type.*/ 173U_CAPI UBool U_EXPORT2 174u_isdefined(UChar32 c) { 175 uint32_t props; 176 GET_PROPS(c, props); 177 return (UBool)(GET_CATEGORY(props)!=0); 178} 179 180/* Checks if the Unicode character is a base form character that can take a diacritic.*/ 181U_CAPI UBool U_EXPORT2 182u_isbase(UChar32 c) { 183 uint32_t props; 184 GET_PROPS(c, props); 185 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0); 186} 187 188/* Checks if the Unicode character is a control character.*/ 189U_CAPI UBool U_EXPORT2 190u_iscntrl(UChar32 c) { 191 uint32_t props; 192 GET_PROPS(c, props); 193 return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0); 194} 195 196U_CAPI UBool U_EXPORT2 197u_isISOControl(UChar32 c) { 198 return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f); 199} 200 201/* Some control characters that are used as space. */ 202#define IS_THAT_CONTROL_SPACE(c) \ 203 (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)) 204 205/* Java has decided that U+0085 New Line is not whitespace any more. */ 206#define IS_THAT_ASCII_CONTROL_SPACE(c) \ 207 (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c)) 208 209/* Checks if the Unicode character is a space character.*/ 210U_CAPI UBool U_EXPORT2 211u_isspace(UChar32 c) { 212 uint32_t props; 213 GET_PROPS(c, props); 214 return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c)); 215} 216 217U_CAPI UBool U_EXPORT2 218u_isJavaSpaceChar(UChar32 c) { 219 uint32_t props; 220 GET_PROPS(c, props); 221 return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0); 222} 223 224/* Checks if the Unicode character is a whitespace character.*/ 225U_CAPI UBool U_EXPORT2 226u_isWhitespace(UChar32 c) { 227 uint32_t props; 228 GET_PROPS(c, props); 229 return (UBool)( 230 ((CAT_MASK(props)&U_GC_Z_MASK)!=0 && 231 c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */ 232 IS_THAT_ASCII_CONTROL_SPACE(c) 233 ); 234} 235 236U_CAPI UBool U_EXPORT2 237u_isblank(UChar32 c) { 238 if((uint32_t)c<=0x9f) { 239 return c==9 || c==0x20; /* TAB or SPACE */ 240 } else { 241 /* Zs */ 242 uint32_t props; 243 GET_PROPS(c, props); 244 return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR); 245 } 246} 247 248U_CAPI UBool U_EXPORT2 249u_isUWhiteSpace(UChar32 c) { 250 return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0; 251} 252 253/* Checks if the Unicode character is printable.*/ 254U_CAPI UBool U_EXPORT2 255u_isprint(UChar32 c) { 256 uint32_t props; 257 GET_PROPS(c, props); 258 /* comparing ==0 returns FALSE for the categories mentioned */ 259 return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0); 260} 261 262/** 263 * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. 264 * Implements UCHAR_POSIX_PRINT. 265 * @internal 266 */ 267U_CFUNC UBool 268u_isprintPOSIX(UChar32 c) { 269 uint32_t props; 270 GET_PROPS(c, props); 271 /* 272 * The only cntrl character in graph+blank is TAB (in blank). 273 * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). 274 */ 275 return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c)); 276} 277 278U_CAPI UBool U_EXPORT2 279u_isgraph(UChar32 c) { 280 uint32_t props; 281 GET_PROPS(c, props); 282 /* comparing ==0 returns FALSE for the categories mentioned */ 283 return (UBool)((CAT_MASK(props)& 284 (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 285 ==0); 286} 287 288/** 289 * Checks if c is in 290 * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] 291 * with space=\p{Whitespace} and Control=Cc. 292 * Implements UCHAR_POSIX_GRAPH. 293 * @internal 294 */ 295U_CFUNC UBool 296u_isgraphPOSIX(UChar32 c) { 297 uint32_t props; 298 GET_PROPS(c, props); 299 /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ 300 /* comparing ==0 returns FALSE for the categories mentioned */ 301 return (UBool)((CAT_MASK(props)& 302 (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 303 ==0); 304} 305 306U_CAPI UBool U_EXPORT2 307u_ispunct(UChar32 c) { 308 uint32_t props; 309 GET_PROPS(c, props); 310 return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0); 311} 312 313/* Checks if the Unicode character can start a Unicode identifier.*/ 314U_CAPI UBool U_EXPORT2 315u_isIDStart(UChar32 c) { 316 /* same as u_isalpha() */ 317 uint32_t props; 318 GET_PROPS(c, props); 319 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0); 320} 321 322/* Checks if the Unicode character can be a Unicode identifier part other than starting the 323 identifier.*/ 324U_CAPI UBool U_EXPORT2 325u_isIDPart(UChar32 c) { 326 uint32_t props; 327 GET_PROPS(c, props); 328 return (UBool)( 329 (CAT_MASK(props)& 330 (U_GC_ND_MASK|U_GC_NL_MASK| 331 U_GC_L_MASK| 332 U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK) 333 )!=0 || 334 u_isIDIgnorable(c)); 335} 336 337/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ 338U_CAPI UBool U_EXPORT2 339u_isIDIgnorable(UChar32 c) { 340 if(c<=0x9f) { 341 return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); 342 } else { 343 uint32_t props; 344 GET_PROPS(c, props); 345 return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); 346 } 347} 348 349/*Checks if the Unicode character can start a Java identifier.*/ 350U_CAPI UBool U_EXPORT2 351u_isJavaIDStart(UChar32 c) { 352 uint32_t props; 353 GET_PROPS(c, props); 354 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0); 355} 356 357/*Checks if the Unicode character can be a Java identifier part other than starting the 358 * identifier. 359 */ 360U_CAPI UBool U_EXPORT2 361u_isJavaIDPart(UChar32 c) { 362 uint32_t props; 363 GET_PROPS(c, props); 364 return (UBool)( 365 (CAT_MASK(props)& 366 (U_GC_ND_MASK|U_GC_NL_MASK| 367 U_GC_L_MASK| 368 U_GC_SC_MASK|U_GC_PC_MASK| 369 U_GC_MC_MASK|U_GC_MN_MASK) 370 )!=0 || 371 u_isIDIgnorable(c)); 372} 373 374U_CAPI int32_t U_EXPORT2 375u_charDigitValue(UChar32 c) { 376 uint32_t props; 377 int32_t value; 378 GET_PROPS(c, props); 379 value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START; 380 if(value<=9) { 381 return value; 382 } else { 383 return -1; 384 } 385} 386 387U_CAPI double U_EXPORT2 388u_getNumericValue(UChar32 c) { 389 uint32_t props; 390 int32_t ntv; 391 GET_PROPS(c, props); 392 ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props); 393 394 if(ntv==UPROPS_NTV_NONE) { 395 return U_NO_NUMERIC_VALUE; 396 } else if(ntv<UPROPS_NTV_DIGIT_START) { 397 /* decimal digit */ 398 return ntv-UPROPS_NTV_DECIMAL_START; 399 } else if(ntv<UPROPS_NTV_NUMERIC_START) { 400 /* other digit */ 401 return ntv-UPROPS_NTV_DIGIT_START; 402 } else if(ntv<UPROPS_NTV_FRACTION_START) { 403 /* small integer */ 404 return ntv-UPROPS_NTV_NUMERIC_START; 405 } else if(ntv<UPROPS_NTV_LARGE_START) { 406 /* fraction */ 407 int32_t numerator=(ntv>>4)-12; 408 int32_t denominator=(ntv&0xf)+1; 409 return (double)numerator/denominator; 410 } else if(ntv<UPROPS_NTV_RESERVED_START) { 411 /* large, single-significant-digit integer */ 412 double numValue; 413 int32_t mant=(ntv>>5)-14; 414 int32_t exp=(ntv&0x1f)+2; 415 numValue=mant; 416 417 /* multiply by 10^exp without math.h */ 418 while(exp>=4) { 419 numValue*=10000.; 420 exp-=4; 421 } 422 switch(exp) { 423 case 3: 424 numValue*=1000.; 425 break; 426 case 2: 427 numValue*=100.; 428 break; 429 case 1: 430 numValue*=10.; 431 break; 432 case 0: 433 default: 434 break; 435 } 436 437 return numValue; 438 } else { 439 /* reserved */ 440 return U_NO_NUMERIC_VALUE; 441 } 442} 443 444U_CAPI int32_t U_EXPORT2 445u_digit(UChar32 ch, int8_t radix) { 446 int8_t value; 447 if((uint8_t)(radix-2)<=(36-2)) { 448 value=(int8_t)u_charDigitValue(ch); 449 if(value<0) { 450 /* ch is not a decimal digit, try latin letters */ 451 if(ch>=0x61 && ch<=0x7A) { 452 value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */ 453 } else if(ch>=0x41 && ch<=0x5A) { 454 value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */ 455 } else if(ch>=0xFF41 && ch<=0xFF5A) { 456 value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */ 457 } else if(ch>=0xFF21 && ch<=0xFF3A) { 458 value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */ 459 } 460 } 461 } else { 462 value=-1; /* invalid radix */ 463 } 464 return (int8_t)((value<radix) ? value : -1); 465} 466 467U_CAPI UChar32 U_EXPORT2 468u_forDigit(int32_t digit, int8_t radix) { 469 if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) { 470 return 0; 471 } else if(digit<10) { 472 return (UChar32)(0x30+digit); 473 } else { 474 return (UChar32)((0x61-10)+digit); 475 } 476} 477 478/* miscellaneous, and support for uprops.c ---------------------------------- */ 479 480U_CAPI void U_EXPORT2 481u_getUnicodeVersion(UVersionInfo versionArray) { 482 if(versionArray!=NULL) { 483 uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH); 484 } 485} 486 487U_CFUNC uint32_t 488u_getUnicodeProperties(UChar32 c, int32_t column) { 489 uint16_t vecIndex; 490 491 if(column==-1) { 492 uint32_t props; 493 GET_PROPS(c, props); 494 return props; 495 } else if( 496 column<0 || column>=propsVectorsColumns 497 ) { 498 return 0; 499 } else { 500 vecIndex=UTRIE2_GET16(&propsVectorsTrie, c); 501 return propsVectors[vecIndex+column]; 502 } 503} 504 505U_CFUNC int32_t 506uprv_getMaxValues(int32_t column) { 507 switch(column) { 508 case 0: 509 return indexes[UPROPS_MAX_VALUES_INDEX]; 510 case 2: 511 return indexes[UPROPS_MAX_VALUES_2_INDEX]; 512 default: 513 return 0; 514 } 515} 516 517U_CAPI void U_EXPORT2 518u_charAge(UChar32 c, UVersionInfo versionArray) { 519 if(versionArray!=NULL) { 520 uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT; 521 versionArray[0]=(uint8_t)(version>>4); 522 versionArray[1]=(uint8_t)(version&0xf); 523 versionArray[2]=versionArray[3]=0; 524 } 525} 526 527U_CAPI UScriptCode U_EXPORT2 528uscript_getScript(UChar32 c, UErrorCode *pErrorCode) { 529 uint32_t scriptX; 530 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 531 return USCRIPT_INVALID_CODE; 532 } 533 if((uint32_t)c>0x10ffff) { 534 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 535 return USCRIPT_INVALID_CODE; 536 } 537 scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 538 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 539 return (UScriptCode)scriptX; 540 } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) { 541 return USCRIPT_COMMON; 542 } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) { 543 return USCRIPT_INHERITED; 544 } else { 545 return (UScriptCode)scriptExtensions[scriptX&UPROPS_SCRIPT_MASK]; 546 } 547} 548 549U_DRAFT UBool U_EXPORT2 550uscript_hasScript(UChar32 c, UScriptCode sc) { 551 UScriptCode script; 552 const uint16_t *scx; 553 uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 554 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 555 return sc==(UScriptCode)scriptX; 556 } 557 558 scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 559 if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) { 560 script=USCRIPT_COMMON; 561 } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) { 562 script=USCRIPT_INHERITED; 563 } else { 564 script=(UScriptCode)scx[0]; 565 scx=scriptExtensions+scx[1]; 566 } 567 if(sc==script) { 568 return TRUE; 569 } 570 while(sc>*scx) { 571 ++scx; 572 } 573 return sc==(*scx&0x7fff); 574} 575 576U_DRAFT int32_t U_EXPORT2 577uscript_getScriptExtensions(UChar32 c, 578 UScriptCode *scripts, int32_t capacity, 579 UErrorCode *pErrorCode) { 580 uint32_t scriptX; 581 int32_t length; 582 const uint16_t *scx; 583 uint16_t sx; 584 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 585 return 0; 586 } 587 if(capacity<0 || (capacity>0 && scripts==NULL)) { 588 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 589 return 0; 590 } 591 scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 592 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 593 return 0; 594 } 595 596 length=0; 597 scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 598 if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { 599 scx=scriptExtensions+scx[1]; 600 } 601 do { 602 sx=*scx++; 603 if(length<capacity) { 604 scripts[length]=sx&0x7fff; 605 } 606 ++length; 607 } while(sx<0x8000); 608 if(length>capacity) { 609 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 610 } 611 return length; 612} 613 614U_CAPI UBlockCode U_EXPORT2 615ublock_getCode(UChar32 c) { 616 return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT); 617} 618 619/* property starts for UnicodeSet ------------------------------------------- */ 620 621static UBool U_CALLCONV 622_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 623 /* add the start code point to the USet */ 624 const USetAdder *sa=(const USetAdder *)context; 625 sa->add(sa->set, start); 626 return TRUE; 627} 628 629#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1) 630 631U_CFUNC void U_EXPORT2 632uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 633 if(U_FAILURE(*pErrorCode)) { 634 return; 635 } 636 637 /* add the start code point of each same-value range of the main trie */ 638 utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa); 639 640 /* add code points with hardcoded properties, plus the ones following them */ 641 642 /* add for u_isblank() */ 643 USET_ADD_CP_AND_NEXT(sa, TAB); 644 645 /* add for IS_THAT_CONTROL_SPACE() */ 646 sa->add(sa->set, CR+1); /* range TAB..CR */ 647 sa->add(sa->set, 0x1c); 648 sa->add(sa->set, 0x1f+1); 649 USET_ADD_CP_AND_NEXT(sa, NL); 650 651 /* add for u_isIDIgnorable() what was not added above */ 652 sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ 653 sa->add(sa->set, HAIRSP); 654 sa->add(sa->set, RLM+1); 655 sa->add(sa->set, INHSWAP); 656 sa->add(sa->set, NOMDIG+1); 657 USET_ADD_CP_AND_NEXT(sa, ZWNBSP); 658 659 /* add no-break spaces for u_isWhitespace() what was not added above */ 660 USET_ADD_CP_AND_NEXT(sa, NBSP); 661 USET_ADD_CP_AND_NEXT(sa, FIGURESP); 662 USET_ADD_CP_AND_NEXT(sa, NNBSP); 663 664 /* add for u_digit() */ 665 sa->add(sa->set, U_a); 666 sa->add(sa->set, U_z+1); 667 sa->add(sa->set, U_A); 668 sa->add(sa->set, U_Z+1); 669 sa->add(sa->set, U_FW_a); 670 sa->add(sa->set, U_FW_z+1); 671 sa->add(sa->set, U_FW_A); 672 sa->add(sa->set, U_FW_Z+1); 673 674 /* add for u_isxdigit() */ 675 sa->add(sa->set, U_f+1); 676 sa->add(sa->set, U_F+1); 677 sa->add(sa->set, U_FW_f+1); 678 sa->add(sa->set, U_FW_F+1); 679 680 /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ 681 sa->add(sa->set, WJ); /* range WJ..NOMDIG */ 682 sa->add(sa->set, 0xfff0); 683 sa->add(sa->set, 0xfffb+1); 684 sa->add(sa->set, 0xe0000); 685 sa->add(sa->set, 0xe0fff+1); 686 687 /* add for UCHAR_GRAPHEME_BASE and others */ 688 USET_ADD_CP_AND_NEXT(sa, CGJ); 689} 690 691U_CFUNC void U_EXPORT2 692upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 693 if(U_FAILURE(*pErrorCode)) { 694 return; 695 } 696 697 /* add the start code point of each same-value range of the properties vectors trie */ 698 if(propsVectorsColumns>0) { 699 /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */ 700 utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); 701 } 702} 703