1/*
2********************************************************************************
3*   Copyright (C) 1996-2012, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5********************************************************************************
6*
7* File UCHAR.C
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   04/02/97    aliu        Creation.
13*   4/15/99     Madhu       Updated all the function definitions for C Implementation
14*   5/20/99     Madhu       Added the function u_getVersion()
15*   8/19/1999   srl         Upgraded scripts to Unicode3.0
16*   11/11/1999  weiv        added u_isalnum(), cleaned comments
17*   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion.
18*   06/20/2000  helena      OS/400 port changes; mostly typecast.
19******************************************************************************
20*/
21
22#include "unicode/utypes.h"
23#include "unicode/uchar.h"
24#include "unicode/uscript.h"
25#include "unicode/udata.h"
26#include "uassert.h"
27#include "cmemory.h"
28#include "ucln_cmn.h"
29#include "utrie2.h"
30#include "udataswp.h"
31#include "uprops.h"
32#include "ustr_imp.h"
33
34#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
35
36/* uchar_props_data.h is machine-generated by genprops --csource */
37#define INCLUDED_FROM_UCHAR_C
38#include "uchar_props_data.h"
39
40/* constants and macros for access to the data ------------------------------ */
41
42/* getting a uint32_t properties word from the data */
43#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c));
44
45U_CFUNC UBool
46uprv_haveProperties(UErrorCode *pErrorCode) {
47    if(U_FAILURE(*pErrorCode)) {
48        return FALSE;
49    }
50    return TRUE;
51}
52
53/* API functions ------------------------------------------------------------ */
54
55/* Gets the Unicode character's general category.*/
56U_CAPI int8_t U_EXPORT2
57u_charType(UChar32 c) {
58    uint32_t props;
59    GET_PROPS(c, props);
60    return (int8_t)GET_CATEGORY(props);
61}
62
63/* Enumerate all code points with their general categories. */
64struct _EnumTypeCallback {
65    UCharEnumTypeRange *enumRange;
66    const void *context;
67};
68
69static uint32_t U_CALLCONV
70_enumTypeValue(const void *context, uint32_t value) {
71    return GET_CATEGORY(value);
72}
73
74static UBool U_CALLCONV
75_enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
76    /* just cast the value to UCharCategory */
77    return ((struct _EnumTypeCallback *)context)->
78        enumRange(((struct _EnumTypeCallback *)context)->context,
79                  start, end+1, (UCharCategory)value);
80}
81
82U_CAPI void U_EXPORT2
83u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) {
84    struct _EnumTypeCallback callback;
85
86    if(enumRange==NULL) {
87        return;
88    }
89
90    callback.enumRange=enumRange;
91    callback.context=context;
92    utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback);
93}
94
95/* Checks if ch is a lower case letter.*/
96U_CAPI UBool U_EXPORT2
97u_islower(UChar32 c) {
98    uint32_t props;
99    GET_PROPS(c, props);
100    return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER);
101}
102
103/* Checks if ch is an upper case letter.*/
104U_CAPI UBool U_EXPORT2
105u_isupper(UChar32 c) {
106    uint32_t props;
107    GET_PROPS(c, props);
108    return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER);
109}
110
111/* Checks if ch is a title case letter; usually upper case letters.*/
112U_CAPI UBool U_EXPORT2
113u_istitle(UChar32 c) {
114    uint32_t props;
115    GET_PROPS(c, props);
116    return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER);
117}
118
119/* Checks if ch is a decimal digit. */
120U_CAPI UBool U_EXPORT2
121u_isdigit(UChar32 c) {
122    uint32_t props;
123    GET_PROPS(c, props);
124    return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
125}
126
127U_CAPI UBool U_EXPORT2
128u_isxdigit(UChar32 c) {
129    uint32_t props;
130
131    /* check ASCII and Fullwidth ASCII a-fA-F */
132    if(
133        (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
134        (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
135    ) {
136        return TRUE;
137    }
138
139    GET_PROPS(c, props);
140    return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
141}
142
143/* Checks if the Unicode character is a letter.*/
144U_CAPI UBool U_EXPORT2
145u_isalpha(UChar32 c) {
146    uint32_t props;
147    GET_PROPS(c, props);
148    return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0);
149}
150
151U_CAPI UBool U_EXPORT2
152u_isUAlphabetic(UChar32 c) {
153    return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0;
154}
155
156/* Checks if c is a letter or a decimal digit */
157U_CAPI UBool U_EXPORT2
158u_isalnum(UChar32 c) {
159    uint32_t props;
160    GET_PROPS(c, props);
161    return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0);
162}
163
164/**
165 * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
166 * @internal
167 */
168U_CFUNC UBool
169u_isalnumPOSIX(UChar32 c) {
170    return (UBool)(u_isUAlphabetic(c) || u_isdigit(c));
171}
172
173/* Checks if ch is a unicode character with assigned character type.*/
174U_CAPI UBool U_EXPORT2
175u_isdefined(UChar32 c) {
176    uint32_t props;
177    GET_PROPS(c, props);
178    return (UBool)(GET_CATEGORY(props)!=0);
179}
180
181/* Checks if the Unicode character is a base form character that can take a diacritic.*/
182U_CAPI UBool U_EXPORT2
183u_isbase(UChar32 c) {
184    uint32_t props;
185    GET_PROPS(c, props);
186    return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0);
187}
188
189/* Checks if the Unicode character is a control character.*/
190U_CAPI UBool U_EXPORT2
191u_iscntrl(UChar32 c) {
192    uint32_t props;
193    GET_PROPS(c, props);
194    return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0);
195}
196
197U_CAPI UBool U_EXPORT2
198u_isISOControl(UChar32 c) {
199    return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f);
200}
201
202/* Some control characters that are used as space. */
203#define IS_THAT_CONTROL_SPACE(c) \
204    (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
205
206/* Java has decided that U+0085 New Line is not whitespace any more. */
207#define IS_THAT_ASCII_CONTROL_SPACE(c) \
208    (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c))
209
210/* Checks if the Unicode character is a space character.*/
211U_CAPI UBool U_EXPORT2
212u_isspace(UChar32 c) {
213    uint32_t props;
214    GET_PROPS(c, props);
215    return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c));
216}
217
218U_CAPI UBool U_EXPORT2
219u_isJavaSpaceChar(UChar32 c) {
220    uint32_t props;
221    GET_PROPS(c, props);
222    return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0);
223}
224
225/* Checks if the Unicode character is a whitespace character.*/
226U_CAPI UBool U_EXPORT2
227u_isWhitespace(UChar32 c) {
228    uint32_t props;
229    GET_PROPS(c, props);
230    return (UBool)(
231                ((CAT_MASK(props)&U_GC_Z_MASK)!=0 &&
232                    c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */
233                IS_THAT_ASCII_CONTROL_SPACE(c)
234           );
235}
236
237U_CAPI UBool U_EXPORT2
238u_isblank(UChar32 c) {
239    if((uint32_t)c<=0x9f) {
240        return c==9 || c==0x20; /* TAB or SPACE */
241    } else {
242        /* Zs */
243        uint32_t props;
244        GET_PROPS(c, props);
245        return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR);
246    }
247}
248
249U_CAPI UBool U_EXPORT2
250u_isUWhiteSpace(UChar32 c) {
251    return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0;
252}
253
254/* Checks if the Unicode character is printable.*/
255U_CAPI UBool U_EXPORT2
256u_isprint(UChar32 c) {
257    uint32_t props;
258    GET_PROPS(c, props);
259    /* comparing ==0 returns FALSE for the categories mentioned */
260    return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0);
261}
262
263/**
264 * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
265 * Implements UCHAR_POSIX_PRINT.
266 * @internal
267 */
268U_CFUNC UBool
269u_isprintPOSIX(UChar32 c) {
270    uint32_t props;
271    GET_PROPS(c, props);
272    /*
273     * The only cntrl character in graph+blank is TAB (in blank).
274     * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
275     */
276    return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c));
277}
278
279U_CAPI UBool U_EXPORT2
280u_isgraph(UChar32 c) {
281    uint32_t props;
282    GET_PROPS(c, props);
283    /* comparing ==0 returns FALSE for the categories mentioned */
284    return (UBool)((CAT_MASK(props)&
285                    (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
286                   ==0);
287}
288
289/**
290 * Checks if c is in
291 * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
292 * with space=\p{Whitespace} and Control=Cc.
293 * Implements UCHAR_POSIX_GRAPH.
294 * @internal
295 */
296U_CFUNC UBool
297u_isgraphPOSIX(UChar32 c) {
298    uint32_t props;
299    GET_PROPS(c, props);
300    /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
301    /* comparing ==0 returns FALSE for the categories mentioned */
302    return (UBool)((CAT_MASK(props)&
303                    (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
304                   ==0);
305}
306
307U_CAPI UBool U_EXPORT2
308u_ispunct(UChar32 c) {
309    uint32_t props;
310    GET_PROPS(c, props);
311    return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0);
312}
313
314/* Checks if the Unicode character can start a Unicode identifier.*/
315U_CAPI UBool U_EXPORT2
316u_isIDStart(UChar32 c) {
317    /* same as u_isalpha() */
318    uint32_t props;
319    GET_PROPS(c, props);
320    return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0);
321}
322
323/* Checks if the Unicode character can be a Unicode identifier part other than starting the
324 identifier.*/
325U_CAPI UBool U_EXPORT2
326u_isIDPart(UChar32 c) {
327    uint32_t props;
328    GET_PROPS(c, props);
329    return (UBool)(
330           (CAT_MASK(props)&
331            (U_GC_ND_MASK|U_GC_NL_MASK|
332             U_GC_L_MASK|
333             U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK)
334           )!=0 ||
335           u_isIDIgnorable(c));
336}
337
338/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
339U_CAPI UBool U_EXPORT2
340u_isIDIgnorable(UChar32 c) {
341    if(c<=0x9f) {
342        return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c);
343    } else {
344        uint32_t props;
345        GET_PROPS(c, props);
346        return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR);
347    }
348}
349
350/*Checks if the Unicode character can start a Java identifier.*/
351U_CAPI UBool U_EXPORT2
352u_isJavaIDStart(UChar32 c) {
353    uint32_t props;
354    GET_PROPS(c, props);
355    return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0);
356}
357
358/*Checks if the Unicode character can be a Java identifier part other than starting the
359 * identifier.
360 */
361U_CAPI UBool U_EXPORT2
362u_isJavaIDPart(UChar32 c) {
363    uint32_t props;
364    GET_PROPS(c, props);
365    return (UBool)(
366           (CAT_MASK(props)&
367            (U_GC_ND_MASK|U_GC_NL_MASK|
368             U_GC_L_MASK|
369             U_GC_SC_MASK|U_GC_PC_MASK|
370             U_GC_MC_MASK|U_GC_MN_MASK)
371           )!=0 ||
372           u_isIDIgnorable(c));
373}
374
375U_CAPI int32_t U_EXPORT2
376u_charDigitValue(UChar32 c) {
377    uint32_t props;
378    int32_t value;
379    GET_PROPS(c, props);
380    value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START;
381    if(value<=9) {
382        return value;
383    } else {
384        return -1;
385    }
386}
387
388U_CAPI double U_EXPORT2
389u_getNumericValue(UChar32 c) {
390    uint32_t props;
391    int32_t ntv;
392    GET_PROPS(c, props);
393    ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props);
394
395    if(ntv==UPROPS_NTV_NONE) {
396        return U_NO_NUMERIC_VALUE;
397    } else if(ntv<UPROPS_NTV_DIGIT_START) {
398        /* decimal digit */
399        return ntv-UPROPS_NTV_DECIMAL_START;
400    } else if(ntv<UPROPS_NTV_NUMERIC_START) {
401        /* other digit */
402        return ntv-UPROPS_NTV_DIGIT_START;
403    } else if(ntv<UPROPS_NTV_FRACTION_START) {
404        /* small integer */
405        return ntv-UPROPS_NTV_NUMERIC_START;
406    } else if(ntv<UPROPS_NTV_LARGE_START) {
407        /* fraction */
408        int32_t numerator=(ntv>>4)-12;
409        int32_t denominator=(ntv&0xf)+1;
410        return (double)numerator/denominator;
411    } else if(ntv<UPROPS_NTV_BASE60_START) {
412        /* large, single-significant-digit integer */
413        double numValue;
414        int32_t mant=(ntv>>5)-14;
415        int32_t exp=(ntv&0x1f)+2;
416        numValue=mant;
417
418        /* multiply by 10^exp without math.h */
419        while(exp>=4) {
420            numValue*=10000.;
421            exp-=4;
422        }
423        switch(exp) {
424        case 3:
425            numValue*=1000.;
426            break;
427        case 2:
428            numValue*=100.;
429            break;
430        case 1:
431            numValue*=10.;
432            break;
433        case 0:
434        default:
435            break;
436        }
437
438        return numValue;
439    } else if(ntv<UPROPS_NTV_RESERVED_START) {
440        /* sexagesimal (base 60) integer */
441        int32_t numValue=(ntv>>2)-0xbf;
442        int32_t exp=(ntv&3)+1;
443
444        switch(exp) {
445        case 4:
446            numValue*=60*60*60*60;
447            break;
448        case 3:
449            numValue*=60*60*60;
450            break;
451        case 2:
452            numValue*=60*60;
453            break;
454        case 1:
455            numValue*=60;
456            break;
457        case 0:
458        default:
459            break;
460        }
461
462        return numValue;
463    } else {
464        /* reserved */
465        return U_NO_NUMERIC_VALUE;
466    }
467}
468
469U_CAPI int32_t U_EXPORT2
470u_digit(UChar32 ch, int8_t radix) {
471    int8_t value;
472    if((uint8_t)(radix-2)<=(36-2)) {
473        value=(int8_t)u_charDigitValue(ch);
474        if(value<0) {
475            /* ch is not a decimal digit, try latin letters */
476            if(ch>=0x61 && ch<=0x7A) {
477                value=(int8_t)(ch-0x57);  /* ch - 'a' + 10 */
478            } else if(ch>=0x41 && ch<=0x5A) {
479                value=(int8_t)(ch-0x37);  /* ch - 'A' + 10 */
480            } else if(ch>=0xFF41 && ch<=0xFF5A) {
481                value=(int8_t)(ch-0xFF37);  /* fullwidth ASCII a-z */
482            } else if(ch>=0xFF21 && ch<=0xFF3A) {
483                value=(int8_t)(ch-0xFF17);  /* fullwidth ASCII A-Z */
484            }
485        }
486    } else {
487        value=-1;   /* invalid radix */
488    }
489    return (int8_t)((value<radix) ? value : -1);
490}
491
492U_CAPI UChar32 U_EXPORT2
493u_forDigit(int32_t digit, int8_t radix) {
494    if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) {
495        return 0;
496    } else if(digit<10) {
497        return (UChar32)(0x30+digit);
498    } else {
499        return (UChar32)((0x61-10)+digit);
500    }
501}
502
503/* miscellaneous, and support for uprops.cpp -------------------------------- */
504
505U_CAPI void U_EXPORT2
506u_getUnicodeVersion(UVersionInfo versionArray) {
507    if(versionArray!=NULL) {
508        uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH);
509    }
510}
511
512U_CFUNC uint32_t
513u_getMainProperties(UChar32 c) {
514    uint32_t props;
515    GET_PROPS(c, props);
516    return props;
517}
518
519U_CFUNC uint32_t
520u_getUnicodeProperties(UChar32 c, int32_t column) {
521    U_ASSERT(column>=0);
522    if(column>=propsVectorsColumns) {
523        return 0;
524    } else {
525        uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c);
526        return propsVectors[vecIndex+column];
527    }
528}
529
530U_CFUNC int32_t
531uprv_getMaxValues(int32_t column) {
532    switch(column) {
533    case 0:
534        return indexes[UPROPS_MAX_VALUES_INDEX];
535    case 2:
536        return indexes[UPROPS_MAX_VALUES_2_INDEX];
537    default:
538        return 0;
539    }
540}
541
542U_CAPI void U_EXPORT2
543u_charAge(UChar32 c, UVersionInfo versionArray) {
544    if(versionArray!=NULL) {
545        uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT;
546        versionArray[0]=(uint8_t)(version>>4);
547        versionArray[1]=(uint8_t)(version&0xf);
548        versionArray[2]=versionArray[3]=0;
549    }
550}
551
552U_CAPI UScriptCode U_EXPORT2
553uscript_getScript(UChar32 c, UErrorCode *pErrorCode) {
554    uint32_t scriptX;
555    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
556        return USCRIPT_INVALID_CODE;
557    }
558    if((uint32_t)c>0x10ffff) {
559        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
560        return USCRIPT_INVALID_CODE;
561    }
562    scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
563    if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
564        return (UScriptCode)scriptX;
565    } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) {
566        return USCRIPT_COMMON;
567    } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) {
568        return USCRIPT_INHERITED;
569    } else {
570        return (UScriptCode)scriptExtensions[scriptX&UPROPS_SCRIPT_MASK];
571    }
572}
573
574U_CAPI UBool U_EXPORT2
575uscript_hasScript(UChar32 c, UScriptCode sc) {
576    const uint16_t *scx;
577    uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
578    if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
579        return sc==(UScriptCode)scriptX;
580    }
581
582    scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK);
583    if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
584        scx=scriptExtensions+scx[1];
585    }
586    if(sc>=USCRIPT_CODE_LIMIT) {
587        /* Guard against bogus input that would make us go past the Script_Extensions terminator. */
588        return FALSE;
589    }
590    while(sc>*scx) {
591        ++scx;
592    }
593    return sc==(*scx&0x7fff);
594}
595
596U_CAPI int32_t U_EXPORT2
597uscript_getScriptExtensions(UChar32 c,
598                            UScriptCode *scripts, int32_t capacity,
599                            UErrorCode *pErrorCode) {
600    uint32_t scriptX;
601    int32_t length;
602    const uint16_t *scx;
603    uint16_t sx;
604    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
605        return 0;
606    }
607    if(capacity<0 || (capacity>0 && scripts==NULL)) {
608        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
609        return 0;
610    }
611    scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
612    if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
613        if(capacity==0) {
614            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
615        } else {
616            scripts[0]=(UScriptCode)scriptX;
617        }
618        return 1;
619    }
620
621    scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK);
622    if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
623        scx=scriptExtensions+scx[1];
624    }
625    length=0;
626    do {
627        sx=*scx++;
628        if(length<capacity) {
629            scripts[length]=(UScriptCode)(sx&0x7fff);
630        }
631        ++length;
632    } while(sx<0x8000);
633    if(length>capacity) {
634        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
635    }
636    return length;
637}
638
639U_CAPI UBlockCode U_EXPORT2
640ublock_getCode(UChar32 c) {
641    return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT);
642}
643
644/* property starts for UnicodeSet ------------------------------------------- */
645
646static UBool U_CALLCONV
647_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
648    /* add the start code point to the USet */
649    const USetAdder *sa=(const USetAdder *)context;
650    sa->add(sa->set, start);
651    return TRUE;
652}
653
654#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)
655
656U_CFUNC void U_EXPORT2
657uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
658    if(U_FAILURE(*pErrorCode)) {
659        return;
660    }
661
662    /* add the start code point of each same-value range of the main trie */
663    utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa);
664
665    /* add code points with hardcoded properties, plus the ones following them */
666
667    /* add for u_isblank() */
668    USET_ADD_CP_AND_NEXT(sa, TAB);
669
670    /* add for IS_THAT_CONTROL_SPACE() */
671    sa->add(sa->set, CR+1); /* range TAB..CR */
672    sa->add(sa->set, 0x1c);
673    sa->add(sa->set, 0x1f+1);
674    USET_ADD_CP_AND_NEXT(sa, NL);
675
676    /* add for u_isIDIgnorable() what was not added above */
677    sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
678    sa->add(sa->set, HAIRSP);
679    sa->add(sa->set, RLM+1);
680    sa->add(sa->set, INHSWAP);
681    sa->add(sa->set, NOMDIG+1);
682    USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
683
684    /* add no-break spaces for u_isWhitespace() what was not added above */
685    USET_ADD_CP_AND_NEXT(sa, NBSP);
686    USET_ADD_CP_AND_NEXT(sa, FIGURESP);
687    USET_ADD_CP_AND_NEXT(sa, NNBSP);
688
689    /* add for u_digit() */
690    sa->add(sa->set, U_a);
691    sa->add(sa->set, U_z+1);
692    sa->add(sa->set, U_A);
693    sa->add(sa->set, U_Z+1);
694    sa->add(sa->set, U_FW_a);
695    sa->add(sa->set, U_FW_z+1);
696    sa->add(sa->set, U_FW_A);
697    sa->add(sa->set, U_FW_Z+1);
698
699    /* add for u_isxdigit() */
700    sa->add(sa->set, U_f+1);
701    sa->add(sa->set, U_F+1);
702    sa->add(sa->set, U_FW_f+1);
703    sa->add(sa->set, U_FW_F+1);
704
705    /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
706    sa->add(sa->set, WJ); /* range WJ..NOMDIG */
707    sa->add(sa->set, 0xfff0);
708    sa->add(sa->set, 0xfffb+1);
709    sa->add(sa->set, 0xe0000);
710    sa->add(sa->set, 0xe0fff+1);
711
712    /* add for UCHAR_GRAPHEME_BASE and others */
713    USET_ADD_CP_AND_NEXT(sa, CGJ);
714}
715
716U_CFUNC void U_EXPORT2
717upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
718    if(U_FAILURE(*pErrorCode)) {
719        return;
720    }
721
722    /* add the start code point of each same-value range of the properties vectors trie */
723    if(propsVectorsColumns>0) {
724        /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */
725        utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
726    }
727}
728