1/*
2*******************************************************************************
3*
4*   Copyright (C) 2001-2010, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  ustrcase.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2002feb20
14*   created by: Markus W. Scherer
15*
16*   Implementation file for string casing C API functions.
17*   Uses functions from uchar.c for basic functionality that requires access
18*   to the Unicode Character Database (uprops.dat).
19*/
20
21#include "unicode/utypes.h"
22#include "unicode/uloc.h"
23#include "unicode/ustring.h"
24#include "unicode/ucasemap.h"
25#include "unicode/ubrk.h"
26#include "cmemory.h"
27#include "ucase.h"
28#include "ustr_imp.h"
29
30/* string casing ------------------------------------------------------------ */
31
32/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
33static U_INLINE int32_t
34appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
35             int32_t result, const UChar *s) {
36    UChar32 c;
37    int32_t length;
38
39    /* decode the result */
40    if(result<0) {
41        /* (not) original code point */
42        c=~result;
43        length=-1;
44    } else if(result<=UCASE_MAX_STRING_LENGTH) {
45        c=U_SENTINEL;
46        length=result;
47    } else {
48        c=result;
49        length=-1;
50    }
51
52    if(destIndex<destCapacity) {
53        /* append the result */
54        if(length<0) {
55            /* code point */
56            UBool isError=FALSE;
57            U16_APPEND(dest, destIndex, destCapacity, c, isError);
58            if(isError) {
59                /* overflow, nothing written */
60                destIndex+=U16_LENGTH(c);
61            }
62        } else {
63            /* string */
64            if((destIndex+length)<=destCapacity) {
65                while(length>0) {
66                    dest[destIndex++]=*s++;
67                    --length;
68                }
69            } else {
70                /* overflow */
71                destIndex+=length;
72            }
73        }
74    } else {
75        /* preflight */
76        if(length<0) {
77            destIndex+=U16_LENGTH(c);
78        } else {
79            destIndex+=length;
80        }
81    }
82    return destIndex;
83}
84
85static UChar32 U_CALLCONV
86utf16_caseContextIterator(void *context, int8_t dir) {
87    UCaseContext *csc=(UCaseContext *)context;
88    UChar32 c;
89
90    if(dir<0) {
91        /* reset for backward iteration */
92        csc->index=csc->cpStart;
93        csc->dir=dir;
94    } else if(dir>0) {
95        /* reset for forward iteration */
96        csc->index=csc->cpLimit;
97        csc->dir=dir;
98    } else {
99        /* continue current iteration direction */
100        dir=csc->dir;
101    }
102
103    if(dir<0) {
104        if(csc->start<csc->index) {
105            U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
106            return c;
107        }
108    } else {
109        if(csc->index<csc->limit) {
110            U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
111            return c;
112        }
113    }
114    return U_SENTINEL;
115}
116
117/*
118 * Case-maps [srcStart..srcLimit[ but takes
119 * context [0..srcLength[ into account.
120 */
121static int32_t
122_caseMap(const UCaseMap *csm, UCaseMapFull *map,
123         UChar *dest, int32_t destCapacity,
124         const UChar *src, UCaseContext *csc,
125         int32_t srcStart, int32_t srcLimit,
126         UErrorCode *pErrorCode) {
127    const UChar *s;
128    UChar32 c, c2 = 0;
129    int32_t srcIndex, destIndex;
130    int32_t locCache;
131
132    locCache=csm->locCache;
133
134    /* case mapping loop */
135    srcIndex=srcStart;
136    destIndex=0;
137    while(srcIndex<srcLimit) {
138        csc->cpStart=srcIndex;
139        U16_NEXT(src, srcIndex, srcLimit, c);
140        csc->cpLimit=srcIndex;
141        c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache);
142        if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
143            /* fast path version of appendResult() for BMP results */
144            dest[destIndex++]=(UChar)c2;
145        } else {
146            destIndex=appendResult(dest, destIndex, destCapacity, c, s);
147        }
148    }
149
150    if(destIndex>destCapacity) {
151        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
152    }
153    return destIndex;
154}
155
156static void
157setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
158    /*
159     * We could call ucasemap_setLocale(), but here we really only care about
160     * the initial language subtag, we need not return the real string via
161     * ucasemap_getLocale(), and we don't care about only getting "x" from
162     * "x-some-thing" etc.
163     *
164     * We ignore locales with a longer-than-3 initial subtag.
165     *
166     * We also do not fill in the locCache because it is rarely used,
167     * and not worth setting unless we reuse it for many case mapping operations.
168     * (That's why UCaseMap was created.)
169     */
170    int i;
171    char c;
172
173    /* the internal functions require locale!=NULL */
174    if(locale==NULL) {
175        locale=uloc_getDefault();
176    }
177    for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
178        csm->locale[i]=c;
179    }
180    if(i<=3) {
181        csm->locale[i]=0;  /* Up to 3 non-separator characters. */
182    } else {
183        csm->locale[0]=0;  /* Longer-than-3 initial subtag: Ignore. */
184    }
185}
186
187/*
188 * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
189 * Do this fast because it is called with every function call.
190 */
191static U_INLINE void
192setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
193    if(csm->csp==NULL) {
194        csm->csp=ucase_getSingleton();
195    }
196    if(locale!=NULL && locale[0]==0) {
197        csm->locale[0]=0;
198    } else {
199        setTempCaseMapLocale(csm, locale, pErrorCode);
200    }
201}
202
203#if !UCONFIG_NO_BREAK_ITERATION
204
205/*
206 * Internal titlecasing function.
207 */
208static int32_t
209_toTitle(UCaseMap *csm,
210         UChar *dest, int32_t destCapacity,
211         const UChar *src, UCaseContext *csc,
212         int32_t srcLength,
213         UErrorCode *pErrorCode) {
214    const UChar *s;
215    UChar32 c;
216    int32_t prev, titleStart, titleLimit, idx, destIndex, length;
217    UBool isFirstIndex;
218
219    if(csm->iter!=NULL) {
220        ubrk_setText(csm->iter, src, srcLength, pErrorCode);
221    } else {
222        csm->iter=ubrk_open(UBRK_WORD, csm->locale,
223                            src, srcLength,
224                            pErrorCode);
225    }
226    if(U_FAILURE(*pErrorCode)) {
227        return 0;
228    }
229
230    /* set up local variables */
231    destIndex=0;
232    prev=0;
233    isFirstIndex=TRUE;
234
235    /* titlecasing loop */
236    while(prev<srcLength) {
237        /* find next index where to titlecase */
238        if(isFirstIndex) {
239            isFirstIndex=FALSE;
240            idx=ubrk_first(csm->iter);
241        } else {
242            idx=ubrk_next(csm->iter);
243        }
244        if(idx==UBRK_DONE || idx>srcLength) {
245            idx=srcLength;
246        }
247
248        /*
249         * Unicode 4 & 5 section 3.13 Default Case Operations:
250         *
251         * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
252         * #29, "Text Boundaries." Between each pair of word boundaries, find the first
253         * cased character F. If F exists, map F to default_title(F); then map each
254         * subsequent character C to default_lower(C).
255         *
256         * In this implementation, segment [prev..index[ into 3 parts:
257         * a) uncased characters (copy as-is) [prev..titleStart[
258         * b) first case letter (titlecase)         [titleStart..titleLimit[
259         * c) subsequent characters (lowercase)                 [titleLimit..index[
260         */
261        if(prev<idx) {
262            /* find and copy uncased characters [prev..titleStart[ */
263            titleStart=titleLimit=prev;
264            U16_NEXT(src, titleLimit, idx, c);
265            if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
266                /* Adjust the titlecasing index (titleStart) to the next cased character. */
267                for(;;) {
268                    titleStart=titleLimit;
269                    if(titleLimit==idx) {
270                        /*
271                         * only uncased characters in [prev..index[
272                         * stop with titleStart==titleLimit==index
273                         */
274                        break;
275                    }
276                    U16_NEXT(src, titleLimit, idx, c);
277                    if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
278                        break; /* cased letter at [titleStart..titleLimit[ */
279                    }
280                }
281                length=titleStart-prev;
282                if(length>0) {
283                    if((destIndex+length)<=destCapacity) {
284                        uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);
285                    }
286                    destIndex+=length;
287                }
288            }
289
290            if(titleStart<titleLimit) {
291                /* titlecase c which is from [titleStart..titleLimit[ */
292                csc->cpStart=titleStart;
293                csc->cpLimit=titleLimit;
294                c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
295                destIndex=appendResult(dest, destIndex, destCapacity, c, s);
296
297                /* Special case Dutch IJ titlecasing */
298                if ( titleStart+1 < idx &&
299                     ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH &&
300                     ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) &&
301                     ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) {
302                            c=(UChar32) 0x004A;
303                            destIndex=appendResult(dest, destIndex, destCapacity, c, s);
304                            titleLimit++;
305                }
306
307                /* lowercase [titleLimit..index[ */
308                if(titleLimit<idx) {
309                    if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
310                        /* Normal operation: Lowercase the rest of the word. */
311                        destIndex+=
312                            _caseMap(
313                                csm, ucase_toFullLower,
314                                dest+destIndex, destCapacity-destIndex,
315                                src, csc,
316                                titleLimit, idx,
317                                pErrorCode);
318                    } else {
319                        /* Optionally just copy the rest of the word unchanged. */
320                        length=idx-titleLimit;
321                        if((destIndex+length)<=destCapacity) {
322                            uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR);
323                        }
324                        destIndex+=length;
325                    }
326                }
327            }
328        }
329
330        prev=idx;
331    }
332
333    if(destIndex>destCapacity) {
334        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
335    }
336    return destIndex;
337}
338
339#endif
340
341/* functions available in the common library (for unistr_case.cpp) */
342
343U_CFUNC int32_t
344ustr_toLower(const UCaseProps *csp,
345             UChar *dest, int32_t destCapacity,
346             const UChar *src, int32_t srcLength,
347             const char *locale,
348             UErrorCode *pErrorCode) {
349    UCaseMap csm={ NULL };
350    UCaseContext csc={ NULL };
351
352    csm.csp=csp;
353    setTempCaseMap(&csm, locale, pErrorCode);
354    csc.p=(void *)src;
355    csc.limit=srcLength;
356
357    return _caseMap(&csm, ucase_toFullLower,
358                    dest, destCapacity,
359                    src, &csc, 0, srcLength,
360                    pErrorCode);
361}
362
363U_CFUNC int32_t
364ustr_toUpper(const UCaseProps *csp,
365             UChar *dest, int32_t destCapacity,
366             const UChar *src, int32_t srcLength,
367             const char *locale,
368             UErrorCode *pErrorCode) {
369    UCaseMap csm={ NULL };
370    UCaseContext csc={ NULL };
371
372    csm.csp=csp;
373    setTempCaseMap(&csm, locale, pErrorCode);
374    csc.p=(void *)src;
375    csc.limit=srcLength;
376
377    return _caseMap(&csm, ucase_toFullUpper,
378                    dest, destCapacity,
379                    src, &csc, 0, srcLength,
380                    pErrorCode);
381}
382
383#if !UCONFIG_NO_BREAK_ITERATION
384
385U_CFUNC int32_t
386ustr_toTitle(const UCaseProps *csp,
387             UChar *dest, int32_t destCapacity,
388             const UChar *src, int32_t srcLength,
389             UBreakIterator *titleIter,
390             const char *locale, uint32_t options,
391             UErrorCode *pErrorCode) {
392    UCaseMap csm={ NULL };
393    UCaseContext csc={ NULL };
394    int32_t length;
395
396    csm.csp=csp;
397    csm.iter=titleIter;
398    csm.options=options;
399    setTempCaseMap(&csm, locale, pErrorCode);
400    csc.p=(void *)src;
401    csc.limit=srcLength;
402
403    length=_toTitle(&csm,
404                    dest, destCapacity,
405                    src, &csc, srcLength,
406                    pErrorCode);
407    if(titleIter==NULL && csm.iter!=NULL) {
408        ubrk_close(csm.iter);
409    }
410    return length;
411}
412
413#endif
414
415U_CFUNC int32_t
416ustr_foldCase(const UCaseProps *csp,
417              UChar *dest, int32_t destCapacity,
418              const UChar *src, int32_t srcLength,
419              uint32_t options,
420              UErrorCode *pErrorCode) {
421    int32_t srcIndex, destIndex;
422
423    const UChar *s;
424    UChar32 c, c2 = 0;
425
426    /* case mapping loop */
427    srcIndex=destIndex=0;
428    while(srcIndex<srcLength) {
429        U16_NEXT(src, srcIndex, srcLength, c);
430        c=ucase_toFullFolding(csp, c, &s, options);
431        if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
432            /* fast path version of appendResult() for BMP results */
433            dest[destIndex++]=(UChar)c2;
434        } else {
435            destIndex=appendResult(dest, destIndex, destCapacity, c, s);
436        }
437    }
438
439    if(destIndex>destCapacity) {
440        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
441    }
442    return destIndex;
443}
444
445/*
446 * Implement argument checking and buffer handling
447 * for string case mapping as a common function.
448 */
449
450/* common internal function for public API functions */
451
452static int32_t
453caseMap(const UCaseMap *csm,
454        UChar *dest, int32_t destCapacity,
455        const UChar *src, int32_t srcLength,
456        int32_t toWhichCase,
457        UErrorCode *pErrorCode) {
458    UChar buffer[300];
459    UChar *temp;
460
461    int32_t destLength;
462
463    /* check argument values */
464    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
465        return 0;
466    }
467    if( destCapacity<0 ||
468        (dest==NULL && destCapacity>0) ||
469        src==NULL ||
470        srcLength<-1
471    ) {
472        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
473        return 0;
474    }
475
476    /* get the string length */
477    if(srcLength==-1) {
478        srcLength=u_strlen(src);
479    }
480
481    /* check for overlapping source and destination */
482    if( dest!=NULL &&
483        ((src>=dest && src<(dest+destCapacity)) ||
484         (dest>=src && dest<(src+srcLength)))
485    ) {
486        /* overlap: provide a temporary destination buffer and later copy the result */
487        if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) {
488            /* the stack buffer is large enough */
489            temp=buffer;
490        } else {
491            /* allocate a buffer */
492            temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
493            if(temp==NULL) {
494                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
495                return 0;
496            }
497        }
498    } else {
499        temp=dest;
500    }
501
502    destLength=0;
503
504    if(toWhichCase==FOLD_CASE) {
505        destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength,
506                                 csm->options, pErrorCode);
507    } else {
508        UCaseContext csc={ NULL };
509
510        csc.p=(void *)src;
511        csc.limit=srcLength;
512
513        if(toWhichCase==TO_LOWER) {
514            destLength=_caseMap(csm, ucase_toFullLower,
515                                temp, destCapacity,
516                                src, &csc,
517                                0, srcLength,
518                                pErrorCode);
519        } else if(toWhichCase==TO_UPPER) {
520            destLength=_caseMap(csm, ucase_toFullUpper,
521                                temp, destCapacity,
522                                src, &csc,
523                                0, srcLength,
524                                pErrorCode);
525        } else /* if(toWhichCase==TO_TITLE) */ {
526#if UCONFIG_NO_BREAK_ITERATION
527            *pErrorCode=U_UNSUPPORTED_ERROR;
528#else
529            /* UCaseMap is actually non-const in toTitle() APIs. */
530            destLength=_toTitle((UCaseMap *)csm, temp, destCapacity,
531                                src, &csc, srcLength,
532                                pErrorCode);
533#endif
534        }
535    }
536    if(temp!=dest) {
537        /* copy the result string to the destination buffer */
538        if(destLength>0) {
539            int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity;
540            if(copyLength>0) {
541                uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR);
542            }
543        }
544        if(temp!=buffer) {
545            uprv_free(temp);
546        }
547    }
548
549    return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
550}
551
552/* public API functions */
553
554U_CAPI int32_t U_EXPORT2
555u_strToLower(UChar *dest, int32_t destCapacity,
556             const UChar *src, int32_t srcLength,
557             const char *locale,
558             UErrorCode *pErrorCode) {
559    UCaseMap csm={ NULL };
560    setTempCaseMap(&csm, locale, pErrorCode);
561    return caseMap(&csm,
562                   dest, destCapacity,
563                   src, srcLength,
564                   TO_LOWER, pErrorCode);
565}
566
567U_CAPI int32_t U_EXPORT2
568u_strToUpper(UChar *dest, int32_t destCapacity,
569             const UChar *src, int32_t srcLength,
570             const char *locale,
571             UErrorCode *pErrorCode) {
572    UCaseMap csm={ NULL };
573    setTempCaseMap(&csm, locale, pErrorCode);
574    return caseMap(&csm,
575                   dest, destCapacity,
576                   src, srcLength,
577                   TO_UPPER, pErrorCode);
578}
579
580#if !UCONFIG_NO_BREAK_ITERATION
581
582U_CAPI int32_t U_EXPORT2
583u_strToTitle(UChar *dest, int32_t destCapacity,
584             const UChar *src, int32_t srcLength,
585             UBreakIterator *titleIter,
586             const char *locale,
587             UErrorCode *pErrorCode) {
588    UCaseMap csm={ NULL };
589    int32_t length;
590
591    csm.iter=titleIter;
592    setTempCaseMap(&csm, locale, pErrorCode);
593    length=caseMap(&csm,
594                   dest, destCapacity,
595                   src, srcLength,
596                   TO_TITLE, pErrorCode);
597    if(titleIter==NULL && csm.iter!=NULL) {
598        ubrk_close(csm.iter);
599    }
600    return length;
601}
602
603U_CAPI int32_t U_EXPORT2
604ucasemap_toTitle(UCaseMap *csm,
605                 UChar *dest, int32_t destCapacity,
606                 const UChar *src, int32_t srcLength,
607                 UErrorCode *pErrorCode) {
608    return caseMap(csm,
609                   dest, destCapacity,
610                   src, srcLength,
611                   TO_TITLE, pErrorCode);
612}
613
614#endif
615
616U_CAPI int32_t U_EXPORT2
617u_strFoldCase(UChar *dest, int32_t destCapacity,
618              const UChar *src, int32_t srcLength,
619              uint32_t options,
620              UErrorCode *pErrorCode) {
621    UCaseMap csm={ NULL };
622    csm.csp=ucase_getSingleton();
623    csm.options=options;
624    return caseMap(&csm,
625                   dest, destCapacity,
626                   src, srcLength,
627                   FOLD_CASE, pErrorCode);
628}
629
630/* case-insensitive string comparisons -------------------------------------- */
631
632/*
633 * This function is a copy of unorm_cmpEquivFold() minus the parts for
634 * canonical equivalence.
635 * Keep the functions in sync, and see there for how this works.
636 * The duplication is for modularization:
637 * It makes caseless (but not canonical caseless) matches independent of
638 * the normalization code.
639 */
640
641/* stack element for previous-level source/decomposition pointers */
642struct CmpEquivLevel {
643    const UChar *start, *s, *limit;
644};
645typedef struct CmpEquivLevel CmpEquivLevel;
646
647/* internal function */
648U_CFUNC int32_t
649u_strcmpFold(const UChar *s1, int32_t length1,
650             const UChar *s2, int32_t length2,
651             uint32_t options,
652             UErrorCode *pErrorCode) {
653    const UCaseProps *csp;
654
655    /* current-level start/limit - s1/s2 as current */
656    const UChar *start1, *start2, *limit1, *limit2;
657
658    /* case folding variables */
659    const UChar *p;
660    int32_t length;
661
662    /* stacks of previous-level start/current/limit */
663    CmpEquivLevel stack1[2], stack2[2];
664
665    /* case folding buffers, only use current-level start/limit */
666    UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
667
668    /* track which is the current level per string */
669    int32_t level1, level2;
670
671    /* current code units, and code points for lookups */
672    UChar32 c1, c2, cp1, cp2;
673
674    /* no argument error checking because this itself is not an API */
675
676    /*
677     * assume that at least the option U_COMPARE_IGNORE_CASE is set
678     * otherwise this function would have to behave exactly as uprv_strCompare()
679     */
680    csp=ucase_getSingleton();
681    if(U_FAILURE(*pErrorCode)) {
682        return 0;
683    }
684
685    /* initialize */
686    start1=s1;
687    if(length1==-1) {
688        limit1=NULL;
689    } else {
690        limit1=s1+length1;
691    }
692
693    start2=s2;
694    if(length2==-1) {
695        limit2=NULL;
696    } else {
697        limit2=s2+length2;
698    }
699
700    level1=level2=0;
701    c1=c2=-1;
702
703    /* comparison loop */
704    for(;;) {
705        /*
706         * here a code unit value of -1 means "get another code unit"
707         * below it will mean "this source is finished"
708         */
709
710        if(c1<0) {
711            /* get next code unit from string 1, post-increment */
712            for(;;) {
713                if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
714                    if(level1==0) {
715                        c1=-1;
716                        break;
717                    }
718                } else {
719                    ++s1;
720                    break;
721                }
722
723                /* reached end of level buffer, pop one level */
724                do {
725                    --level1;
726                    start1=stack1[level1].start;
727                } while(start1==NULL);
728                s1=stack1[level1].s;
729                limit1=stack1[level1].limit;
730            }
731        }
732
733        if(c2<0) {
734            /* get next code unit from string 2, post-increment */
735            for(;;) {
736                if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
737                    if(level2==0) {
738                        c2=-1;
739                        break;
740                    }
741                } else {
742                    ++s2;
743                    break;
744                }
745
746                /* reached end of level buffer, pop one level */
747                do {
748                    --level2;
749                    start2=stack2[level2].start;
750                } while(start2==NULL);
751                s2=stack2[level2].s;
752                limit2=stack2[level2].limit;
753            }
754        }
755
756        /*
757         * compare c1 and c2
758         * either variable c1, c2 is -1 only if the corresponding string is finished
759         */
760        if(c1==c2) {
761            if(c1<0) {
762                return 0;   /* c1==c2==-1 indicating end of strings */
763            }
764            c1=c2=-1;       /* make us fetch new code units */
765            continue;
766        } else if(c1<0) {
767            return -1;      /* string 1 ends before string 2 */
768        } else if(c2<0) {
769            return 1;       /* string 2 ends before string 1 */
770        }
771        /* c1!=c2 && c1>=0 && c2>=0 */
772
773        /* get complete code points for c1, c2 for lookups if either is a surrogate */
774        cp1=c1;
775        if(U_IS_SURROGATE(c1)) {
776            UChar c;
777
778            if(U_IS_SURROGATE_LEAD(c1)) {
779                if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
780                    /* advance ++s1; only below if cp1 decomposes/case-folds */
781                    cp1=U16_GET_SUPPLEMENTARY(c1, c);
782                }
783            } else /* isTrail(c1) */ {
784                if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
785                    cp1=U16_GET_SUPPLEMENTARY(c, c1);
786                }
787            }
788        }
789
790        cp2=c2;
791        if(U_IS_SURROGATE(c2)) {
792            UChar c;
793
794            if(U_IS_SURROGATE_LEAD(c2)) {
795                if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
796                    /* advance ++s2; only below if cp2 decomposes/case-folds */
797                    cp2=U16_GET_SUPPLEMENTARY(c2, c);
798                }
799            } else /* isTrail(c2) */ {
800                if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
801                    cp2=U16_GET_SUPPLEMENTARY(c, c2);
802                }
803            }
804        }
805
806        /*
807         * go down one level for each string
808         * continue with the main loop as soon as there is a real change
809         */
810
811        if( level1==0 &&
812            (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
813        ) {
814            /* cp1 case-folds to the code point "length" or to p[length] */
815            if(U_IS_SURROGATE(c1)) {
816                if(U_IS_SURROGATE_LEAD(c1)) {
817                    /* advance beyond source surrogate pair if it case-folds */
818                    ++s1;
819                } else /* isTrail(c1) */ {
820                    /*
821                     * we got a supplementary code point when hitting its trail surrogate,
822                     * therefore the lead surrogate must have been the same as in the other string;
823                     * compare this decomposition with the lead surrogate in the other string
824                     * remember that this simulates bulk text replacement:
825                     * the decomposition would replace the entire code point
826                     */
827                    --s2;
828                    c2=*(s2-1);
829                }
830            }
831
832            /* push current level pointers */
833            stack1[0].start=start1;
834            stack1[0].s=s1;
835            stack1[0].limit=limit1;
836            ++level1;
837
838            /* copy the folding result to fold1[] */
839            if(length<=UCASE_MAX_STRING_LENGTH) {
840                u_memcpy(fold1, p, length);
841            } else {
842                int32_t i=0;
843                U16_APPEND_UNSAFE(fold1, i, length);
844                length=i;
845            }
846
847            /* set next level pointers to case folding */
848            start1=s1=fold1;
849            limit1=fold1+length;
850
851            /* get ready to read from decomposition, continue with loop */
852            c1=-1;
853            continue;
854        }
855
856        if( level2==0 &&
857            (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
858        ) {
859            /* cp2 case-folds to the code point "length" or to p[length] */
860            if(U_IS_SURROGATE(c2)) {
861                if(U_IS_SURROGATE_LEAD(c2)) {
862                    /* advance beyond source surrogate pair if it case-folds */
863                    ++s2;
864                } else /* isTrail(c2) */ {
865                    /*
866                     * we got a supplementary code point when hitting its trail surrogate,
867                     * therefore the lead surrogate must have been the same as in the other string;
868                     * compare this decomposition with the lead surrogate in the other string
869                     * remember that this simulates bulk text replacement:
870                     * the decomposition would replace the entire code point
871                     */
872                    --s1;
873                    c1=*(s1-1);
874                }
875            }
876
877            /* push current level pointers */
878            stack2[0].start=start2;
879            stack2[0].s=s2;
880            stack2[0].limit=limit2;
881            ++level2;
882
883            /* copy the folding result to fold2[] */
884            if(length<=UCASE_MAX_STRING_LENGTH) {
885                u_memcpy(fold2, p, length);
886            } else {
887                int32_t i=0;
888                U16_APPEND_UNSAFE(fold2, i, length);
889                length=i;
890            }
891
892            /* set next level pointers to case folding */
893            start2=s2=fold2;
894            limit2=fold2+length;
895
896            /* get ready to read from decomposition, continue with loop */
897            c2=-1;
898            continue;
899        }
900
901        /*
902         * no decomposition/case folding, max level for both sides:
903         * return difference result
904         *
905         * code point order comparison must not just return cp1-cp2
906         * because when single surrogates are present then the surrogate pairs
907         * that formed cp1 and cp2 may be from different string indexes
908         *
909         * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
910         * c1=d800 cp1=10001 c2=dc00 cp2=10000
911         * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
912         *
913         * therefore, use same fix-up as in ustring.c/uprv_strCompare()
914         * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
915         * so we have slightly different pointer/start/limit comparisons here
916         */
917
918        if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
919            /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
920            if(
921                (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
922                (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
923            ) {
924                /* part of a surrogate pair, leave >=d800 */
925            } else {
926                /* BMP code point - may be surrogate code point - make <d800 */
927                c1-=0x2800;
928            }
929
930            if(
931                (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
932                (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
933            ) {
934                /* part of a surrogate pair, leave >=d800 */
935            } else {
936                /* BMP code point - may be surrogate code point - make <d800 */
937                c2-=0x2800;
938            }
939        }
940
941        return c1-c2;
942    }
943}
944
945/* public API functions */
946
947U_CAPI int32_t U_EXPORT2
948u_strCaseCompare(const UChar *s1, int32_t length1,
949                 const UChar *s2, int32_t length2,
950                 uint32_t options,
951                 UErrorCode *pErrorCode) {
952    /* argument checking */
953    if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
954        return 0;
955    }
956    if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
957        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
958        return 0;
959    }
960    return u_strcmpFold(s1, length1, s2, length2,
961                        options|U_COMPARE_IGNORE_CASE,
962                        pErrorCode);
963}
964
965U_CAPI int32_t U_EXPORT2
966u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
967    UErrorCode errorCode=U_ZERO_ERROR;
968    return u_strcmpFold(s1, -1, s2, -1,
969                        options|U_COMPARE_IGNORE_CASE,
970                        &errorCode);
971}
972
973U_CAPI int32_t U_EXPORT2
974u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
975    UErrorCode errorCode=U_ZERO_ERROR;
976    return u_strcmpFold(s1, length, s2, length,
977                        options|U_COMPARE_IGNORE_CASE,
978                        &errorCode);
979}
980
981U_CAPI int32_t U_EXPORT2
982u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
983    UErrorCode errorCode=U_ZERO_ERROR;
984    return u_strcmpFold(s1, n, s2, n,
985                        options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
986                        &errorCode);
987}
988