1/*
2*******************************************************************************
3*
4*   Copyright (C) 2001-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  ustrcase.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2002feb20
14*   created by: Markus W. Scherer
15*
16*   Implementation file for string casing C API functions.
17*   Uses functions from uchar.c for basic functionality that requires access
18*   to the Unicode Character Database (uprops.dat).
19*/
20
21#include "unicode/utypes.h"
22#include "unicode/brkiter.h"
23#include "unicode/ustring.h"
24#include "unicode/ucasemap.h"
25#include "unicode/ubrk.h"
26#include "unicode/utf.h"
27#include "unicode/utf16.h"
28#include "cmemory.h"
29#include "ucase.h"
30#include "ustr_imp.h"
31
32#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
33
34U_NAMESPACE_USE
35
36/* string casing ------------------------------------------------------------ */
37
38/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
39static inline int32_t
40appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
41             int32_t result, const UChar *s) {
42    UChar32 c;
43    int32_t length;
44
45    /* decode the result */
46    if(result<0) {
47        /* (not) original code point */
48        c=~result;
49        length=-1;
50    } else if(result<=UCASE_MAX_STRING_LENGTH) {
51        c=U_SENTINEL;
52        length=result;
53    } else {
54        c=result;
55        length=-1;
56    }
57
58    if(destIndex<destCapacity) {
59        /* append the result */
60        if(length<0) {
61            /* code point */
62            UBool isError=FALSE;
63            U16_APPEND(dest, destIndex, destCapacity, c, isError);
64            if(isError) {
65                /* overflow, nothing written */
66                destIndex+=U16_LENGTH(c);
67            }
68        } else {
69            /* string */
70            if((destIndex+length)<=destCapacity) {
71                while(length>0) {
72                    dest[destIndex++]=*s++;
73                    --length;
74                }
75            } else {
76                /* overflow */
77                destIndex+=length;
78            }
79        }
80    } else {
81        /* preflight */
82        if(length<0) {
83            destIndex+=U16_LENGTH(c);
84        } else {
85            destIndex+=length;
86        }
87    }
88    return destIndex;
89}
90
91static UChar32 U_CALLCONV
92utf16_caseContextIterator(void *context, int8_t dir) {
93    UCaseContext *csc=(UCaseContext *)context;
94    UChar32 c;
95
96    if(dir<0) {
97        /* reset for backward iteration */
98        csc->index=csc->cpStart;
99        csc->dir=dir;
100    } else if(dir>0) {
101        /* reset for forward iteration */
102        csc->index=csc->cpLimit;
103        csc->dir=dir;
104    } else {
105        /* continue current iteration direction */
106        dir=csc->dir;
107    }
108
109    if(dir<0) {
110        if(csc->start<csc->index) {
111            U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
112            return c;
113        }
114    } else {
115        if(csc->index<csc->limit) {
116            U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
117            return c;
118        }
119    }
120    return U_SENTINEL;
121}
122
123/*
124 * Case-maps [srcStart..srcLimit[ but takes
125 * context [0..srcLength[ into account.
126 */
127static int32_t
128_caseMap(const UCaseMap *csm, UCaseMapFull *map,
129         UChar *dest, int32_t destCapacity,
130         const UChar *src, UCaseContext *csc,
131         int32_t srcStart, int32_t srcLimit,
132         UErrorCode *pErrorCode) {
133    const UChar *s;
134    UChar32 c, c2 = 0;
135    int32_t srcIndex, destIndex;
136    int32_t locCache;
137
138    locCache=csm->locCache;
139
140    /* case mapping loop */
141    srcIndex=srcStart;
142    destIndex=0;
143    while(srcIndex<srcLimit) {
144        csc->cpStart=srcIndex;
145        U16_NEXT(src, srcIndex, srcLimit, c);
146        csc->cpLimit=srcIndex;
147        c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache);
148        if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
149            /* fast path version of appendResult() for BMP results */
150            dest[destIndex++]=(UChar)c2;
151        } else {
152            destIndex=appendResult(dest, destIndex, destCapacity, c, s);
153        }
154    }
155
156    if(destIndex>destCapacity) {
157        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
158    }
159    return destIndex;
160}
161
162#if !UCONFIG_NO_BREAK_ITERATION
163
164U_CFUNC int32_t U_CALLCONV
165ustrcase_internalToTitle(const UCaseMap *csm,
166                         UChar *dest, int32_t destCapacity,
167                         const UChar *src, int32_t srcLength,
168                         UErrorCode *pErrorCode) {
169    const UChar *s;
170    UChar32 c;
171    int32_t prev, titleStart, titleLimit, idx, destIndex, length;
172    UBool isFirstIndex;
173
174    if(U_FAILURE(*pErrorCode)) {
175        return 0;
176    }
177
178    // Use the C++ abstract base class to minimize dependencies.
179    // TODO: Change UCaseMap.iter to store a BreakIterator directly.
180    BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
181
182    /* set up local variables */
183    int32_t locCache=csm->locCache;
184    UCaseContext csc=UCASECONTEXT_INITIALIZER;
185    csc.p=(void *)src;
186    csc.limit=srcLength;
187    destIndex=0;
188    prev=0;
189    isFirstIndex=TRUE;
190
191    /* titlecasing loop */
192    while(prev<srcLength) {
193        /* find next index where to titlecase */
194        if(isFirstIndex) {
195            isFirstIndex=FALSE;
196            idx=bi->first();
197        } else {
198            idx=bi->next();
199        }
200        if(idx==UBRK_DONE || idx>srcLength) {
201            idx=srcLength;
202        }
203
204        /*
205         * Unicode 4 & 5 section 3.13 Default Case Operations:
206         *
207         * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
208         * #29, "Text Boundaries." Between each pair of word boundaries, find the first
209         * cased character F. If F exists, map F to default_title(F); then map each
210         * subsequent character C to default_lower(C).
211         *
212         * In this implementation, segment [prev..index[ into 3 parts:
213         * a) uncased characters (copy as-is) [prev..titleStart[
214         * b) first case letter (titlecase)         [titleStart..titleLimit[
215         * c) subsequent characters (lowercase)                 [titleLimit..index[
216         */
217        if(prev<idx) {
218            /* find and copy uncased characters [prev..titleStart[ */
219            titleStart=titleLimit=prev;
220            U16_NEXT(src, titleLimit, idx, c);
221            if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
222                /* Adjust the titlecasing index (titleStart) to the next cased character. */
223                for(;;) {
224                    titleStart=titleLimit;
225                    if(titleLimit==idx) {
226                        /*
227                         * only uncased characters in [prev..index[
228                         * stop with titleStart==titleLimit==index
229                         */
230                        break;
231                    }
232                    U16_NEXT(src, titleLimit, idx, c);
233                    if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
234                        break; /* cased letter at [titleStart..titleLimit[ */
235                    }
236                }
237                length=titleStart-prev;
238                if(length>0) {
239                    if((destIndex+length)<=destCapacity) {
240                        uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);
241                    }
242                    destIndex+=length;
243                }
244            }
245
246            if(titleStart<titleLimit) {
247                /* titlecase c which is from [titleStart..titleLimit[ */
248                csc.cpStart=titleStart;
249                csc.cpLimit=titleLimit;
250                c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache);
251                destIndex=appendResult(dest, destIndex, destCapacity, c, s);
252
253                /* Special case Dutch IJ titlecasing */
254                if ( titleStart+1 < idx &&
255                     ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH &&
256                     ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) &&
257                     ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) {
258                            c=(UChar32) 0x004A;
259                            destIndex=appendResult(dest, destIndex, destCapacity, c, s);
260                            titleLimit++;
261                }
262
263                /* lowercase [titleLimit..index[ */
264                if(titleLimit<idx) {
265                    if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
266                        /* Normal operation: Lowercase the rest of the word. */
267                        destIndex+=
268                            _caseMap(
269                                csm, ucase_toFullLower,
270                                dest+destIndex, destCapacity-destIndex,
271                                src, &csc,
272                                titleLimit, idx,
273                                pErrorCode);
274                    } else {
275                        /* Optionally just copy the rest of the word unchanged. */
276                        length=idx-titleLimit;
277                        if((destIndex+length)<=destCapacity) {
278                            uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR);
279                        }
280                        destIndex+=length;
281                    }
282                }
283            }
284        }
285
286        prev=idx;
287    }
288
289    if(destIndex>destCapacity) {
290        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
291    }
292    return destIndex;
293}
294
295#endif  // !UCONFIG_NO_BREAK_ITERATION
296
297/* functions available in the common library (for unistr_case.cpp) */
298
299U_CFUNC int32_t U_CALLCONV
300ustrcase_internalToLower(const UCaseMap *csm,
301                         UChar *dest, int32_t destCapacity,
302                         const UChar *src, int32_t srcLength,
303                         UErrorCode *pErrorCode) {
304    UCaseContext csc=UCASECONTEXT_INITIALIZER;
305    csc.p=(void *)src;
306    csc.limit=srcLength;
307    return _caseMap(
308        csm, ucase_toFullLower,
309        dest, destCapacity,
310        src, &csc, 0, srcLength,
311        pErrorCode);
312}
313
314U_CFUNC int32_t U_CALLCONV
315ustrcase_internalToUpper(const UCaseMap *csm,
316                         UChar *dest, int32_t destCapacity,
317                         const UChar *src, int32_t srcLength,
318                         UErrorCode *pErrorCode) {
319    UCaseContext csc=UCASECONTEXT_INITIALIZER;
320    csc.p=(void *)src;
321    csc.limit=srcLength;
322    return _caseMap(
323        csm, ucase_toFullUpper,
324        dest, destCapacity,
325        src, &csc, 0, srcLength,
326        pErrorCode);
327}
328
329static int32_t
330ustr_foldCase(const UCaseProps *csp,
331              UChar *dest, int32_t destCapacity,
332              const UChar *src, int32_t srcLength,
333              uint32_t options,
334              UErrorCode *pErrorCode) {
335    int32_t srcIndex, destIndex;
336
337    const UChar *s;
338    UChar32 c, c2 = 0;
339
340    /* case mapping loop */
341    srcIndex=destIndex=0;
342    while(srcIndex<srcLength) {
343        U16_NEXT(src, srcIndex, srcLength, c);
344        c=ucase_toFullFolding(csp, c, &s, options);
345        if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
346            /* fast path version of appendResult() for BMP results */
347            dest[destIndex++]=(UChar)c2;
348        } else {
349            destIndex=appendResult(dest, destIndex, destCapacity, c, s);
350        }
351    }
352
353    if(destIndex>destCapacity) {
354        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
355    }
356    return destIndex;
357}
358
359U_CFUNC int32_t U_CALLCONV
360ustrcase_internalFold(const UCaseMap *csm,
361                      UChar *dest, int32_t destCapacity,
362                      const UChar *src, int32_t srcLength,
363                      UErrorCode *pErrorCode) {
364    return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
365}
366
367U_CFUNC int32_t
368ustrcase_map(const UCaseMap *csm,
369             UChar *dest, int32_t destCapacity,
370             const UChar *src, int32_t srcLength,
371             UStringCaseMapper *stringCaseMapper,
372             UErrorCode *pErrorCode) {
373    UChar buffer[300];
374    UChar *temp;
375
376    int32_t destLength;
377
378    /* check argument values */
379    if(U_FAILURE(*pErrorCode)) {
380        return 0;
381    }
382    if( destCapacity<0 ||
383        (dest==NULL && destCapacity>0) ||
384        src==NULL ||
385        srcLength<-1
386    ) {
387        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
388        return 0;
389    }
390
391    /* get the string length */
392    if(srcLength==-1) {
393        srcLength=u_strlen(src);
394    }
395
396    /* check for overlapping source and destination */
397    if( dest!=NULL &&
398        ((src>=dest && src<(dest+destCapacity)) ||
399         (dest>=src && dest<(src+srcLength)))
400    ) {
401        /* overlap: provide a temporary destination buffer and later copy the result */
402        if(destCapacity<=LENGTHOF(buffer)) {
403            /* the stack buffer is large enough */
404            temp=buffer;
405        } else {
406            /* allocate a buffer */
407            temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
408            if(temp==NULL) {
409                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
410                return 0;
411            }
412        }
413    } else {
414        temp=dest;
415    }
416
417    destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode);
418    if(temp!=dest) {
419        /* copy the result string to the destination buffer */
420        if(destLength>0) {
421            int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity;
422            if(copyLength>0) {
423                uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR);
424            }
425        }
426        if(temp!=buffer) {
427            uprv_free(temp);
428        }
429    }
430
431    return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
432}
433
434/* public API functions */
435
436U_CAPI int32_t U_EXPORT2
437u_strFoldCase(UChar *dest, int32_t destCapacity,
438              const UChar *src, int32_t srcLength,
439              uint32_t options,
440              UErrorCode *pErrorCode) {
441    UCaseMap csm=UCASEMAP_INITIALIZER;
442    csm.csp=ucase_getSingleton();
443    csm.options=options;
444    return ustrcase_map(
445        &csm,
446        dest, destCapacity,
447        src, srcLength,
448        ustrcase_internalFold, pErrorCode);
449}
450
451/* case-insensitive string comparisons -------------------------------------- */
452
453/*
454 * This function is a copy of unorm_cmpEquivFold() minus the parts for
455 * canonical equivalence.
456 * Keep the functions in sync, and see there for how this works.
457 * The duplication is for modularization:
458 * It makes caseless (but not canonical caseless) matches independent of
459 * the normalization code.
460 */
461
462/* stack element for previous-level source/decomposition pointers */
463struct CmpEquivLevel {
464    const UChar *start, *s, *limit;
465};
466typedef struct CmpEquivLevel CmpEquivLevel;
467
468/* internal function */
469U_CFUNC int32_t
470u_strcmpFold(const UChar *s1, int32_t length1,
471             const UChar *s2, int32_t length2,
472             uint32_t options,
473             UErrorCode *pErrorCode) {
474    const UCaseProps *csp;
475
476    /* current-level start/limit - s1/s2 as current */
477    const UChar *start1, *start2, *limit1, *limit2;
478
479    /* case folding variables */
480    const UChar *p;
481    int32_t length;
482
483    /* stacks of previous-level start/current/limit */
484    CmpEquivLevel stack1[2], stack2[2];
485
486    /* case folding buffers, only use current-level start/limit */
487    UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
488
489    /* track which is the current level per string */
490    int32_t level1, level2;
491
492    /* current code units, and code points for lookups */
493    UChar32 c1, c2, cp1, cp2;
494
495    /* no argument error checking because this itself is not an API */
496
497    /*
498     * assume that at least the option U_COMPARE_IGNORE_CASE is set
499     * otherwise this function would have to behave exactly as uprv_strCompare()
500     */
501    csp=ucase_getSingleton();
502    if(U_FAILURE(*pErrorCode)) {
503        return 0;
504    }
505
506    /* initialize */
507    start1=s1;
508    if(length1==-1) {
509        limit1=NULL;
510    } else {
511        limit1=s1+length1;
512    }
513
514    start2=s2;
515    if(length2==-1) {
516        limit2=NULL;
517    } else {
518        limit2=s2+length2;
519    }
520
521    level1=level2=0;
522    c1=c2=-1;
523
524    /* comparison loop */
525    for(;;) {
526        /*
527         * here a code unit value of -1 means "get another code unit"
528         * below it will mean "this source is finished"
529         */
530
531        if(c1<0) {
532            /* get next code unit from string 1, post-increment */
533            for(;;) {
534                if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
535                    if(level1==0) {
536                        c1=-1;
537                        break;
538                    }
539                } else {
540                    ++s1;
541                    break;
542                }
543
544                /* reached end of level buffer, pop one level */
545                do {
546                    --level1;
547                    start1=stack1[level1].start;    /*Not uninitialized*/
548                } while(start1==NULL);
549                s1=stack1[level1].s;                /*Not uninitialized*/
550                limit1=stack1[level1].limit;        /*Not uninitialized*/
551            }
552        }
553
554        if(c2<0) {
555            /* get next code unit from string 2, post-increment */
556            for(;;) {
557                if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
558                    if(level2==0) {
559                        c2=-1;
560                        break;
561                    }
562                } else {
563                    ++s2;
564                    break;
565                }
566
567                /* reached end of level buffer, pop one level */
568                do {
569                    --level2;
570                    start2=stack2[level2].start;    /*Not uninitialized*/
571                } while(start2==NULL);
572                s2=stack2[level2].s;                /*Not uninitialized*/
573                limit2=stack2[level2].limit;        /*Not uninitialized*/
574            }
575        }
576
577        /*
578         * compare c1 and c2
579         * either variable c1, c2 is -1 only if the corresponding string is finished
580         */
581        if(c1==c2) {
582            if(c1<0) {
583                return 0;   /* c1==c2==-1 indicating end of strings */
584            }
585            c1=c2=-1;       /* make us fetch new code units */
586            continue;
587        } else if(c1<0) {
588            return -1;      /* string 1 ends before string 2 */
589        } else if(c2<0) {
590            return 1;       /* string 2 ends before string 1 */
591        }
592        /* c1!=c2 && c1>=0 && c2>=0 */
593
594        /* get complete code points for c1, c2 for lookups if either is a surrogate */
595        cp1=c1;
596        if(U_IS_SURROGATE(c1)) {
597            UChar c;
598
599            if(U_IS_SURROGATE_LEAD(c1)) {
600                if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
601                    /* advance ++s1; only below if cp1 decomposes/case-folds */
602                    cp1=U16_GET_SUPPLEMENTARY(c1, c);
603                }
604            } else /* isTrail(c1) */ {
605                if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
606                    cp1=U16_GET_SUPPLEMENTARY(c, c1);
607                }
608            }
609        }
610
611        cp2=c2;
612        if(U_IS_SURROGATE(c2)) {
613            UChar c;
614
615            if(U_IS_SURROGATE_LEAD(c2)) {
616                if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
617                    /* advance ++s2; only below if cp2 decomposes/case-folds */
618                    cp2=U16_GET_SUPPLEMENTARY(c2, c);
619                }
620            } else /* isTrail(c2) */ {
621                if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
622                    cp2=U16_GET_SUPPLEMENTARY(c, c2);
623                }
624            }
625        }
626
627        /*
628         * go down one level for each string
629         * continue with the main loop as soon as there is a real change
630         */
631
632        if( level1==0 &&
633            (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
634        ) {
635            /* cp1 case-folds to the code point "length" or to p[length] */
636            if(U_IS_SURROGATE(c1)) {
637                if(U_IS_SURROGATE_LEAD(c1)) {
638                    /* advance beyond source surrogate pair if it case-folds */
639                    ++s1;
640                } else /* isTrail(c1) */ {
641                    /*
642                     * we got a supplementary code point when hitting its trail surrogate,
643                     * therefore the lead surrogate must have been the same as in the other string;
644                     * compare this decomposition with the lead surrogate in the other string
645                     * remember that this simulates bulk text replacement:
646                     * the decomposition would replace the entire code point
647                     */
648                    --s2;
649                    c2=*(s2-1);
650                }
651            }
652
653            /* push current level pointers */
654            stack1[0].start=start1;
655            stack1[0].s=s1;
656            stack1[0].limit=limit1;
657            ++level1;
658
659            /* copy the folding result to fold1[] */
660            if(length<=UCASE_MAX_STRING_LENGTH) {
661                u_memcpy(fold1, p, length);
662            } else {
663                int32_t i=0;
664                U16_APPEND_UNSAFE(fold1, i, length);
665                length=i;
666            }
667
668            /* set next level pointers to case folding */
669            start1=s1=fold1;
670            limit1=fold1+length;
671
672            /* get ready to read from decomposition, continue with loop */
673            c1=-1;
674            continue;
675        }
676
677        if( level2==0 &&
678            (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
679        ) {
680            /* cp2 case-folds to the code point "length" or to p[length] */
681            if(U_IS_SURROGATE(c2)) {
682                if(U_IS_SURROGATE_LEAD(c2)) {
683                    /* advance beyond source surrogate pair if it case-folds */
684                    ++s2;
685                } else /* isTrail(c2) */ {
686                    /*
687                     * we got a supplementary code point when hitting its trail surrogate,
688                     * therefore the lead surrogate must have been the same as in the other string;
689                     * compare this decomposition with the lead surrogate in the other string
690                     * remember that this simulates bulk text replacement:
691                     * the decomposition would replace the entire code point
692                     */
693                    --s1;
694                    c1=*(s1-1);
695                }
696            }
697
698            /* push current level pointers */
699            stack2[0].start=start2;
700            stack2[0].s=s2;
701            stack2[0].limit=limit2;
702            ++level2;
703
704            /* copy the folding result to fold2[] */
705            if(length<=UCASE_MAX_STRING_LENGTH) {
706                u_memcpy(fold2, p, length);
707            } else {
708                int32_t i=0;
709                U16_APPEND_UNSAFE(fold2, i, length);
710                length=i;
711            }
712
713            /* set next level pointers to case folding */
714            start2=s2=fold2;
715            limit2=fold2+length;
716
717            /* get ready to read from decomposition, continue with loop */
718            c2=-1;
719            continue;
720        }
721
722        /*
723         * no decomposition/case folding, max level for both sides:
724         * return difference result
725         *
726         * code point order comparison must not just return cp1-cp2
727         * because when single surrogates are present then the surrogate pairs
728         * that formed cp1 and cp2 may be from different string indexes
729         *
730         * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
731         * c1=d800 cp1=10001 c2=dc00 cp2=10000
732         * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
733         *
734         * therefore, use same fix-up as in ustring.c/uprv_strCompare()
735         * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
736         * so we have slightly different pointer/start/limit comparisons here
737         */
738
739        if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
740            /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
741            if(
742                (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
743                (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
744            ) {
745                /* part of a surrogate pair, leave >=d800 */
746            } else {
747                /* BMP code point - may be surrogate code point - make <d800 */
748                c1-=0x2800;
749            }
750
751            if(
752                (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
753                (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
754            ) {
755                /* part of a surrogate pair, leave >=d800 */
756            } else {
757                /* BMP code point - may be surrogate code point - make <d800 */
758                c2-=0x2800;
759            }
760        }
761
762        return c1-c2;
763    }
764}
765
766/* public API functions */
767
768U_CAPI int32_t U_EXPORT2
769u_strCaseCompare(const UChar *s1, int32_t length1,
770                 const UChar *s2, int32_t length2,
771                 uint32_t options,
772                 UErrorCode *pErrorCode) {
773    /* argument checking */
774    if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
775        return 0;
776    }
777    if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
778        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
779        return 0;
780    }
781    return u_strcmpFold(s1, length1, s2, length2,
782                        options|U_COMPARE_IGNORE_CASE,
783                        pErrorCode);
784}
785
786U_CAPI int32_t U_EXPORT2
787u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
788    UErrorCode errorCode=U_ZERO_ERROR;
789    return u_strcmpFold(s1, -1, s2, -1,
790                        options|U_COMPARE_IGNORE_CASE,
791                        &errorCode);
792}
793
794U_CAPI int32_t U_EXPORT2
795u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
796    UErrorCode errorCode=U_ZERO_ERROR;
797    return u_strcmpFold(s1, length, s2, length,
798                        options|U_COMPARE_IGNORE_CASE,
799                        &errorCode);
800}
801
802U_CAPI int32_t U_EXPORT2
803u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
804    UErrorCode errorCode=U_ZERO_ERROR;
805    return u_strcmpFold(s1, n, s2, n,
806                        options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
807                        &errorCode);
808}
809