ustrcase.c revision b13da9df870a61b11249bf741347908dbea0edd8
1b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project/*
2b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*******************************************************************************
3b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*
4b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*   Copyright (C) 2001-2007, International Business Machines
5b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*   Corporation and others.  All Rights Reserved.
6b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*
7b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*******************************************************************************
8b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*   file name:  ustrcase.c
9b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*   encoding:   US-ASCII
10b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*   tab size:   8 (not used)
11b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*   indentation:4
12b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*
13b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*   created on: 2002feb20
14b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*   created by: Markus W. Scherer
15b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*
16b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*   Implementation file for string casing C API functions.
17b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*   Uses functions from uchar.c for basic functionality that requires access
18b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*   to the Unicode Character Database (uprops.dat).
19b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*/
20b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project
21b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unicode/utypes.h"
22b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unicode/uloc.h"
23b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unicode/ustring.h"
24b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unicode/ucasemap.h"
25b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unicode/ubrk.h"
26b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "cmemory.h"
27b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "ucase.h"
28b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unormimp.h"
29b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "ustr_imp.h"
30b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project
31b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project/* string casing ------------------------------------------------------------ */
32b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project
33b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
34b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Projectstatic U_INLINE int32_t
35b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source ProjectappendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
36b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project             int32_t result, const UChar *s) {
37b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    UChar32 c;
38b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    int32_t length;
39b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project
40b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    /* decode the result */
41b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    if(result<0) {
42b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        /* (not) original code point */
43b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        c=~result;
44b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        length=-1;
45b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    } else if(result<=UCASE_MAX_STRING_LENGTH) {
46b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        c=U_SENTINEL;
47b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        length=result;
48d07d5a72938fd52415368c2320fc29575ae9a0c3Elliott Hughes    } else {
49b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        c=result;
50b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        length=-1;
51b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    }
52d07d5a72938fd52415368c2320fc29575ae9a0c3Elliott Hughes
53b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    if(destIndex<destCapacity) {
54b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        /* append the result */
55b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        if(length<0) {
56b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            /* code point */
57b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            UBool isError=FALSE;
58b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            U16_APPEND(dest, destIndex, destCapacity, c, isError);
59b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            if(isError) {
60b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project                /* overflow, nothing written */
61b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project                destIndex+=U16_LENGTH(c);
62b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            }
63b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        } else {
64b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            /* string */
65b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            if((destIndex+length)<=destCapacity) {
66b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project                while(length>0) {
67b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project                    dest[destIndex++]=*s++;
68b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project                    --length;
69b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project                }
70b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            } else {
71b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project                /* overflow */
72b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project                destIndex+=length;
73b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            }
74b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        }
75b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    } else {
76b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        /* preflight */
77b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        if(length<0) {
78b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            destIndex+=U16_LENGTH(c);
79b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        } else {
80b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            destIndex+=length;
81b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        }
82b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    }
83b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    return destIndex;
84b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project}
85b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project
86b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Projectstatic UChar32 U_CALLCONV
87b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Projectutf16_caseContextIterator(void *context, int8_t dir) {
88b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    UCaseContext *csc=(UCaseContext *)context;
89b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    UChar32 c;
90b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project
91b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    if(dir<0) {
92b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        /* reset for backward iteration */
93b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        csc->index=csc->cpStart;
94b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        csc->dir=dir;
95b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    } else if(dir>0) {
96b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        /* reset for forward iteration */
97b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        csc->index=csc->cpLimit;
98b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        csc->dir=dir;
99b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    } else {
100b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        /* continue current iteration direction */
101b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        dir=csc->dir;
102b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    }
103b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project
104b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    if(dir<0) {
105b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        if(csc->start<csc->index) {
106b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
107b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            return c;
108b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        }
109b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    } else {
110b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        if(csc->index<csc->limit) {
111b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
112b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project            return c;
113b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project        }
114b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    }
115b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project    return U_SENTINEL;
116b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project}
117
118/*
119 * Case-maps [srcStart..srcLimit[ but takes
120 * context [0..srcLength[ into account.
121 */
122static int32_t
123_caseMap(const UCaseMap *csm, UCaseMapFull *map,
124         UChar *dest, int32_t destCapacity,
125         const UChar *src, UCaseContext *csc,
126         int32_t srcStart, int32_t srcLimit,
127         UErrorCode *pErrorCode) {
128    const UChar *s;
129    UChar32 c, c2;
130    int32_t srcIndex, destIndex;
131    int32_t locCache;
132
133    locCache=csm->locCache;
134
135    /* case mapping loop */
136    srcIndex=srcStart;
137    destIndex=0;
138    while(srcIndex<srcLimit) {
139        csc->cpStart=srcIndex;
140        U16_NEXT(src, srcIndex, srcLimit, c);
141        csc->cpLimit=srcIndex;
142        c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache);
143        if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
144            /* fast path version of appendResult() for BMP results */
145            dest[destIndex++]=(UChar)c2;
146        } else {
147            destIndex=appendResult(dest, destIndex, destCapacity, c, s);
148        }
149    }
150
151    if(destIndex>destCapacity) {
152        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
153    }
154    return destIndex;
155}
156
157static void
158setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
159    /*
160     * We could call ucasemap_setLocale(), but here we really only care about
161     * the initial language subtag, we need not return the real string via
162     * ucasemap_getLocale(), and we don't care about only getting "x" from
163     * "x-some-thing" etc.
164     *
165     * We ignore locales with a longer-than-3 initial subtag.
166     *
167     * We also do not fill in the locCache because it is rarely used,
168     * and not worth setting unless we reuse it for many case mapping operations.
169     * (That's why UCaseMap was created.)
170     */
171    int i;
172    char c;
173
174    /* the internal functions require locale!=NULL */
175    if(locale==NULL) {
176        locale=uloc_getDefault();
177    }
178    for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
179        csm->locale[i]=c;
180    }
181    if(i<=3) {
182        csm->locale[i]=0;  /* Up to 3 non-separator characters. */
183    } else {
184        csm->locale[0]=0;  /* Longer-than-3 initial subtag: Ignore. */
185    }
186}
187
188/*
189 * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
190 * Do this fast because it is called with every function call.
191 */
192static U_INLINE void
193setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
194    if(csm->csp==NULL) {
195        csm->csp=ucase_getSingleton(pErrorCode);
196        if(U_FAILURE(*pErrorCode)) {
197            return;
198        }
199    }
200    if(locale!=NULL && locale[0]==0) {
201        csm->locale[0]=0;
202    } else {
203        setTempCaseMapLocale(csm, locale, pErrorCode);
204    }
205}
206
207#if !UCONFIG_NO_BREAK_ITERATION
208
209/*
210 * Internal titlecasing function.
211 */
212static int32_t
213_toTitle(UCaseMap *csm,
214         UChar *dest, int32_t destCapacity,
215         const UChar *src, UCaseContext *csc,
216         int32_t srcLength,
217         UErrorCode *pErrorCode) {
218    const UChar *s;
219    UChar32 c;
220    int32_t prev, titleStart, titleLimit, index, destIndex, length;
221    UBool isFirstIndex;
222
223    if(csm->iter!=NULL) {
224        ubrk_setText(csm->iter, src, srcLength, pErrorCode);
225    } else {
226        csm->iter=ubrk_open(UBRK_WORD, csm->locale,
227                            src, srcLength,
228                            pErrorCode);
229    }
230    if(U_FAILURE(*pErrorCode)) {
231        return 0;
232    }
233
234    /* set up local variables */
235    destIndex=0;
236    prev=0;
237    isFirstIndex=TRUE;
238
239    /* titlecasing loop */
240    while(prev<srcLength) {
241        /* find next index where to titlecase */
242        if(isFirstIndex) {
243            isFirstIndex=FALSE;
244            index=ubrk_first(csm->iter);
245        } else {
246            index=ubrk_next(csm->iter);
247        }
248        if(index==UBRK_DONE || index>srcLength) {
249            index=srcLength;
250        }
251
252        /*
253         * Unicode 4 & 5 section 3.13 Default Case Operations:
254         *
255         * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
256         * #29, "Text Boundaries." Between each pair of word boundaries, find the first
257         * cased character F. If F exists, map F to default_title(F); then map each
258         * subsequent character C to default_lower(C).
259         *
260         * In this implementation, segment [prev..index[ into 3 parts:
261         * a) uncased characters (copy as-is) [prev..titleStart[
262         * b) first case letter (titlecase)         [titleStart..titleLimit[
263         * c) subsequent characters (lowercase)                 [titleLimit..index[
264         */
265        if(prev<index) {
266            /* find and copy uncased characters [prev..titleStart[ */
267            titleStart=titleLimit=prev;
268            U16_NEXT(src, titleLimit, index, c);
269            if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
270                /* Adjust the titlecasing index (titleStart) to the next cased character. */
271                for(;;) {
272                    titleStart=titleLimit;
273                    if(titleLimit==index) {
274                        /*
275                         * only uncased characters in [prev..index[
276                         * stop with titleStart==titleLimit==index
277                         */
278                        break;
279                    }
280                    U16_NEXT(src, titleLimit, index, c);
281                    if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
282                        break; /* cased letter at [titleStart..titleLimit[ */
283                    }
284                }
285                length=titleStart-prev;
286                if(length>0) {
287                    if((destIndex+length)<=destCapacity) {
288                        uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);
289                    }
290                    destIndex+=length;
291                }
292            }
293
294            if(titleStart<titleLimit) {
295                /* titlecase c which is from [titleStart..titleLimit[ */
296                csc->cpStart=titleStart;
297                csc->cpLimit=titleLimit;
298                c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
299                destIndex=appendResult(dest, destIndex, destCapacity, c, s);
300
301                /* lowercase [titleLimit..index[ */
302                if(titleLimit<index) {
303                    if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
304                        /* Normal operation: Lowercase the rest of the word. */
305                        destIndex+=
306                            _caseMap(
307                                csm, ucase_toFullLower,
308                                dest+destIndex, destCapacity-destIndex,
309                                src, csc,
310                                titleLimit, index,
311                                pErrorCode);
312                    } else {
313                        /* Optionally just copy the rest of the word unchanged. */
314                        length=index-titleLimit;
315                        if((destIndex+length)<=destCapacity) {
316                            uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR);
317                        }
318                        destIndex+=length;
319                    }
320                }
321            }
322        }
323
324        prev=index;
325    }
326
327    if(destIndex>destCapacity) {
328        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
329    }
330    return destIndex;
331}
332
333#endif
334
335/* functions available in the common library (for unistr_case.cpp) */
336
337U_CFUNC int32_t
338ustr_toLower(const UCaseProps *csp,
339             UChar *dest, int32_t destCapacity,
340             const UChar *src, int32_t srcLength,
341             const char *locale,
342             UErrorCode *pErrorCode) {
343    UCaseMap csm={ NULL };
344    UCaseContext csc={ NULL };
345
346    csm.csp=csp;
347    setTempCaseMap(&csm, locale, pErrorCode);
348    csc.p=(void *)src;
349    csc.limit=srcLength;
350
351    return _caseMap(&csm, ucase_toFullLower,
352                    dest, destCapacity,
353                    src, &csc, 0, srcLength,
354                    pErrorCode);
355}
356
357U_CFUNC int32_t
358ustr_toUpper(const UCaseProps *csp,
359             UChar *dest, int32_t destCapacity,
360             const UChar *src, int32_t srcLength,
361             const char *locale,
362             UErrorCode *pErrorCode) {
363    UCaseMap csm={ NULL };
364    UCaseContext csc={ NULL };
365
366    csm.csp=csp;
367    setTempCaseMap(&csm, locale, pErrorCode);
368    csc.p=(void *)src;
369    csc.limit=srcLength;
370
371    return _caseMap(&csm, ucase_toFullUpper,
372                    dest, destCapacity,
373                    src, &csc, 0, srcLength,
374                    pErrorCode);
375}
376
377#if !UCONFIG_NO_BREAK_ITERATION
378
379U_CFUNC int32_t
380ustr_toTitle(const UCaseProps *csp,
381             UChar *dest, int32_t destCapacity,
382             const UChar *src, int32_t srcLength,
383             UBreakIterator *titleIter,
384             const char *locale, uint32_t options,
385             UErrorCode *pErrorCode) {
386    UCaseMap csm={ NULL };
387    UCaseContext csc={ NULL };
388    int32_t length;
389
390    csm.csp=csp;
391    csm.iter=titleIter;
392    csm.options=options;
393    setTempCaseMap(&csm, locale, pErrorCode);
394    csc.p=(void *)src;
395    csc.limit=srcLength;
396
397    length=_toTitle(&csm,
398                    dest, destCapacity,
399                    src, &csc, srcLength,
400                    pErrorCode);
401    if(titleIter==NULL && csm.iter!=NULL) {
402        ubrk_close(csm.iter);
403    }
404    return length;
405}
406
407#endif
408
409U_CFUNC int32_t
410ustr_foldCase(const UCaseProps *csp,
411              UChar *dest, int32_t destCapacity,
412              const UChar *src, int32_t srcLength,
413              uint32_t options,
414              UErrorCode *pErrorCode) {
415    int32_t srcIndex, destIndex;
416
417    const UChar *s;
418    UChar32 c, c2;
419
420    /* case mapping loop */
421    srcIndex=destIndex=0;
422    while(srcIndex<srcLength) {
423        U16_NEXT(src, srcIndex, srcLength, c);
424        c=ucase_toFullFolding(csp, c, &s, options);
425        if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
426            /* fast path version of appendResult() for BMP results */
427            dest[destIndex++]=(UChar)c2;
428        } else {
429            destIndex=appendResult(dest, destIndex, destCapacity, c, s);
430        }
431    }
432
433    if(destIndex>destCapacity) {
434        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
435    }
436    return destIndex;
437}
438
439/*
440 * Implement argument checking and buffer handling
441 * for string case mapping as a common function.
442 */
443
444/* common internal function for public API functions */
445
446static int32_t
447caseMap(const UCaseMap *csm,
448        UChar *dest, int32_t destCapacity,
449        const UChar *src, int32_t srcLength,
450        int32_t toWhichCase,
451        UErrorCode *pErrorCode) {
452    UChar buffer[300];
453    UChar *temp;
454
455    int32_t destLength;
456
457    /* check argument values */
458    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
459        return 0;
460    }
461    if( destCapacity<0 ||
462        (dest==NULL && destCapacity>0) ||
463        src==NULL ||
464        srcLength<-1
465    ) {
466        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
467        return 0;
468    }
469
470    /* get the string length */
471    if(srcLength==-1) {
472        srcLength=u_strlen(src);
473    }
474
475    /* check for overlapping source and destination */
476    if( dest!=NULL &&
477        ((src>=dest && src<(dest+destCapacity)) ||
478         (dest>=src && dest<(src+srcLength)))
479    ) {
480        /* overlap: provide a temporary destination buffer and later copy the result */
481        if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) {
482            /* the stack buffer is large enough */
483            temp=buffer;
484        } else {
485            /* allocate a buffer */
486            temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
487            if(temp==NULL) {
488                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
489                return 0;
490            }
491        }
492    } else {
493        temp=dest;
494    }
495
496    destLength=0;
497
498    if(toWhichCase==FOLD_CASE) {
499        destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength,
500                                 csm->options, pErrorCode);
501    } else {
502        UCaseContext csc={ NULL };
503
504        csc.p=(void *)src;
505        csc.limit=srcLength;
506
507        if(toWhichCase==TO_LOWER) {
508            destLength=_caseMap(csm, ucase_toFullLower,
509                                temp, destCapacity,
510                                src, &csc,
511                                0, srcLength,
512                                pErrorCode);
513        } else if(toWhichCase==TO_UPPER) {
514            destLength=_caseMap(csm, ucase_toFullUpper,
515                                temp, destCapacity,
516                                src, &csc,
517                                0, srcLength,
518                                pErrorCode);
519        } else /* if(toWhichCase==TO_TITLE) */ {
520#if UCONFIG_NO_BREAK_ITERATION
521            *pErrorCode=U_UNSUPPORTED_ERROR;
522#else
523            /* UCaseMap is actually non-const in toTitle() APIs. */
524            destLength=_toTitle((UCaseMap *)csm, temp, destCapacity,
525                                src, &csc, srcLength,
526                                pErrorCode);
527#endif
528        }
529    }
530    if(temp!=dest) {
531        /* copy the result string to the destination buffer */
532        if(destLength>0) {
533            int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity;
534            if(copyLength>0) {
535                uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR);
536            }
537        }
538        if(temp!=buffer) {
539            uprv_free(temp);
540        }
541    }
542
543    return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
544}
545
546/* public API functions */
547
548U_CAPI int32_t U_EXPORT2
549u_strToLower(UChar *dest, int32_t destCapacity,
550             const UChar *src, int32_t srcLength,
551             const char *locale,
552             UErrorCode *pErrorCode) {
553    UCaseMap csm={ NULL };
554    setTempCaseMap(&csm, locale, pErrorCode);
555    return caseMap(&csm,
556                   dest, destCapacity,
557                   src, srcLength,
558                   TO_LOWER, pErrorCode);
559}
560
561U_CAPI int32_t U_EXPORT2
562u_strToUpper(UChar *dest, int32_t destCapacity,
563             const UChar *src, int32_t srcLength,
564             const char *locale,
565             UErrorCode *pErrorCode) {
566    UCaseMap csm={ NULL };
567    setTempCaseMap(&csm, locale, pErrorCode);
568    return caseMap(&csm,
569                   dest, destCapacity,
570                   src, srcLength,
571                   TO_UPPER, pErrorCode);
572}
573
574#if !UCONFIG_NO_BREAK_ITERATION
575
576U_CAPI int32_t U_EXPORT2
577u_strToTitle(UChar *dest, int32_t destCapacity,
578             const UChar *src, int32_t srcLength,
579             UBreakIterator *titleIter,
580             const char *locale,
581             UErrorCode *pErrorCode) {
582    UCaseMap csm={ NULL };
583    int32_t length;
584
585    csm.iter=titleIter;
586    setTempCaseMap(&csm, locale, pErrorCode);
587    length=caseMap(&csm,
588                   dest, destCapacity,
589                   src, srcLength,
590                   TO_TITLE, pErrorCode);
591    if(titleIter==NULL && csm.iter!=NULL) {
592        ubrk_close(csm.iter);
593    }
594    return length;
595}
596
597U_CAPI int32_t U_EXPORT2
598ucasemap_toTitle(UCaseMap *csm,
599                 UChar *dest, int32_t destCapacity,
600                 const UChar *src, int32_t srcLength,
601                 UErrorCode *pErrorCode) {
602    return caseMap(csm,
603                   dest, destCapacity,
604                   src, srcLength,
605                   TO_TITLE, pErrorCode);
606}
607
608#endif
609
610U_CAPI int32_t U_EXPORT2
611u_strFoldCase(UChar *dest, int32_t destCapacity,
612              const UChar *src, int32_t srcLength,
613              uint32_t options,
614              UErrorCode *pErrorCode) {
615    UCaseMap csm={ NULL };
616    csm.csp=ucase_getSingleton(pErrorCode);
617    csm.options=options;
618    return caseMap(&csm,
619                   dest, destCapacity,
620                   src, srcLength,
621                   FOLD_CASE, pErrorCode);
622}
623
624/* case-insensitive string comparisons -------------------------------------- */
625
626/*
627 * This function is a copy of unorm_cmpEquivFold() minus the parts for
628 * canonical equivalence.
629 * Keep the functions in sync, and see there for how this works.
630 * The duplication is for modularization:
631 * It makes caseless (but not canonical caseless) matches independent of
632 * the normalization code.
633 */
634
635/* stack element for previous-level source/decomposition pointers */
636struct CmpEquivLevel {
637    const UChar *start, *s, *limit;
638};
639typedef struct CmpEquivLevel CmpEquivLevel;
640
641/* internal function */
642U_CFUNC int32_t
643u_strcmpFold(const UChar *s1, int32_t length1,
644             const UChar *s2, int32_t length2,
645             uint32_t options,
646             UErrorCode *pErrorCode) {
647    const UCaseProps *csp;
648
649    /* current-level start/limit - s1/s2 as current */
650    const UChar *start1, *start2, *limit1, *limit2;
651
652    /* case folding variables */
653    const UChar *p;
654    int32_t length;
655
656    /* stacks of previous-level start/current/limit */
657    CmpEquivLevel stack1[2], stack2[2];
658
659    /* case folding buffers, only use current-level start/limit */
660    UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
661
662    /* track which is the current level per string */
663    int32_t level1, level2;
664
665    /* current code units, and code points for lookups */
666    UChar32 c1, c2, cp1, cp2;
667
668    /* no argument error checking because this itself is not an API */
669
670    /*
671     * assume that at least the option U_COMPARE_IGNORE_CASE is set
672     * otherwise this function would have to behave exactly as uprv_strCompare()
673     */
674    csp=ucase_getSingleton(pErrorCode);
675    if(U_FAILURE(*pErrorCode)) {
676        return 0;
677    }
678
679    /* initialize */
680    start1=s1;
681    if(length1==-1) {
682        limit1=NULL;
683    } else {
684        limit1=s1+length1;
685    }
686
687    start2=s2;
688    if(length2==-1) {
689        limit2=NULL;
690    } else {
691        limit2=s2+length2;
692    }
693
694    level1=level2=0;
695    c1=c2=-1;
696
697    /* comparison loop */
698    for(;;) {
699        /*
700         * here a code unit value of -1 means "get another code unit"
701         * below it will mean "this source is finished"
702         */
703
704        if(c1<0) {
705            /* get next code unit from string 1, post-increment */
706            for(;;) {
707                if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
708                    if(level1==0) {
709                        c1=-1;
710                        break;
711                    }
712                } else {
713                    ++s1;
714                    break;
715                }
716
717                /* reached end of level buffer, pop one level */
718                do {
719                    --level1;
720                    start1=stack1[level1].start;
721                } while(start1==NULL);
722                s1=stack1[level1].s;
723                limit1=stack1[level1].limit;
724            }
725        }
726
727        if(c2<0) {
728            /* get next code unit from string 2, post-increment */
729            for(;;) {
730                if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
731                    if(level2==0) {
732                        c2=-1;
733                        break;
734                    }
735                } else {
736                    ++s2;
737                    break;
738                }
739
740                /* reached end of level buffer, pop one level */
741                do {
742                    --level2;
743                    start2=stack2[level2].start;
744                } while(start2==NULL);
745                s2=stack2[level2].s;
746                limit2=stack2[level2].limit;
747            }
748        }
749
750        /*
751         * compare c1 and c2
752         * either variable c1, c2 is -1 only if the corresponding string is finished
753         */
754        if(c1==c2) {
755            if(c1<0) {
756                return 0;   /* c1==c2==-1 indicating end of strings */
757            }
758            c1=c2=-1;       /* make us fetch new code units */
759            continue;
760        } else if(c1<0) {
761            return -1;      /* string 1 ends before string 2 */
762        } else if(c2<0) {
763            return 1;       /* string 2 ends before string 1 */
764        }
765        /* c1!=c2 && c1>=0 && c2>=0 */
766
767        /* get complete code points for c1, c2 for lookups if either is a surrogate */
768        cp1=c1;
769        if(U_IS_SURROGATE(c1)) {
770            UChar c;
771
772            if(U_IS_SURROGATE_LEAD(c1)) {
773                if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
774                    /* advance ++s1; only below if cp1 decomposes/case-folds */
775                    cp1=U16_GET_SUPPLEMENTARY(c1, c);
776                }
777            } else /* isTrail(c1) */ {
778                if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
779                    cp1=U16_GET_SUPPLEMENTARY(c, c1);
780                }
781            }
782        }
783
784        cp2=c2;
785        if(U_IS_SURROGATE(c2)) {
786            UChar c;
787
788            if(U_IS_SURROGATE_LEAD(c2)) {
789                if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
790                    /* advance ++s2; only below if cp2 decomposes/case-folds */
791                    cp2=U16_GET_SUPPLEMENTARY(c2, c);
792                }
793            } else /* isTrail(c2) */ {
794                if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
795                    cp2=U16_GET_SUPPLEMENTARY(c, c2);
796                }
797            }
798        }
799
800        /*
801         * go down one level for each string
802         * continue with the main loop as soon as there is a real change
803         */
804
805        if( level1==0 &&
806            (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0
807        ) {
808            /* cp1 case-folds to the code point "length" or to p[length] */
809            if(U_IS_SURROGATE(c1)) {
810                if(U_IS_SURROGATE_LEAD(c1)) {
811                    /* advance beyond source surrogate pair if it case-folds */
812                    ++s1;
813                } else /* isTrail(c1) */ {
814                    /*
815                     * we got a supplementary code point when hitting its trail surrogate,
816                     * therefore the lead surrogate must have been the same as in the other string;
817                     * compare this decomposition with the lead surrogate in the other string
818                     * remember that this simulates bulk text replacement:
819                     * the decomposition would replace the entire code point
820                     */
821                    --s2;
822                    c2=*(s2-1);
823                }
824            }
825
826            /* push current level pointers */
827            stack1[0].start=start1;
828            stack1[0].s=s1;
829            stack1[0].limit=limit1;
830            ++level1;
831
832            /* copy the folding result to fold1[] */
833            if(length<=UCASE_MAX_STRING_LENGTH) {
834                u_memcpy(fold1, p, length);
835            } else {
836                int32_t i=0;
837                U16_APPEND_UNSAFE(fold1, i, length);
838                length=i;
839            }
840
841            /* set next level pointers to case folding */
842            start1=s1=fold1;
843            limit1=fold1+length;
844
845            /* get ready to read from decomposition, continue with loop */
846            c1=-1;
847            continue;
848        }
849
850        if( level2==0 &&
851            (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0
852        ) {
853            /* cp2 case-folds to the code point "length" or to p[length] */
854            if(U_IS_SURROGATE(c2)) {
855                if(U_IS_SURROGATE_LEAD(c2)) {
856                    /* advance beyond source surrogate pair if it case-folds */
857                    ++s2;
858                } else /* isTrail(c2) */ {
859                    /*
860                     * we got a supplementary code point when hitting its trail surrogate,
861                     * therefore the lead surrogate must have been the same as in the other string;
862                     * compare this decomposition with the lead surrogate in the other string
863                     * remember that this simulates bulk text replacement:
864                     * the decomposition would replace the entire code point
865                     */
866                    --s1;
867                    c1=*(s1-1);
868                }
869            }
870
871            /* push current level pointers */
872            stack2[0].start=start2;
873            stack2[0].s=s2;
874            stack2[0].limit=limit2;
875            ++level2;
876
877            /* copy the folding result to fold2[] */
878            if(length<=UCASE_MAX_STRING_LENGTH) {
879                u_memcpy(fold2, p, length);
880            } else {
881                int32_t i=0;
882                U16_APPEND_UNSAFE(fold2, i, length);
883                length=i;
884            }
885
886            /* set next level pointers to case folding */
887            start2=s2=fold2;
888            limit2=fold2+length;
889
890            /* get ready to read from decomposition, continue with loop */
891            c2=-1;
892            continue;
893        }
894
895        /*
896         * no decomposition/case folding, max level for both sides:
897         * return difference result
898         *
899         * code point order comparison must not just return cp1-cp2
900         * because when single surrogates are present then the surrogate pairs
901         * that formed cp1 and cp2 may be from different string indexes
902         *
903         * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
904         * c1=d800 cp1=10001 c2=dc00 cp2=10000
905         * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
906         *
907         * therefore, use same fix-up as in ustring.c/uprv_strCompare()
908         * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
909         * so we have slightly different pointer/start/limit comparisons here
910         */
911
912        if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
913            /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
914            if(
915                (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
916                (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
917            ) {
918                /* part of a surrogate pair, leave >=d800 */
919            } else {
920                /* BMP code point - may be surrogate code point - make <d800 */
921                c1-=0x2800;
922            }
923
924            if(
925                (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
926                (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
927            ) {
928                /* part of a surrogate pair, leave >=d800 */
929            } else {
930                /* BMP code point - may be surrogate code point - make <d800 */
931                c2-=0x2800;
932            }
933        }
934
935        return c1-c2;
936    }
937}
938
939/* public API functions */
940
941U_CAPI int32_t U_EXPORT2
942u_strCaseCompare(const UChar *s1, int32_t length1,
943                 const UChar *s2, int32_t length2,
944                 uint32_t options,
945                 UErrorCode *pErrorCode) {
946    /* argument checking */
947    if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
948        return 0;
949    }
950    if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
951        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
952        return 0;
953    }
954    return u_strcmpFold(s1, length1, s2, length2,
955                        options|U_COMPARE_IGNORE_CASE,
956                        pErrorCode);
957}
958
959U_CAPI int32_t U_EXPORT2
960u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
961    UErrorCode errorCode=U_ZERO_ERROR;
962    return u_strcmpFold(s1, -1, s2, -1,
963                        options|U_COMPARE_IGNORE_CASE,
964                        &errorCode);
965}
966
967U_CAPI int32_t U_EXPORT2
968u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
969    UErrorCode errorCode=U_ZERO_ERROR;
970    return u_strcmpFold(s1, length, s2, length,
971                        options|U_COMPARE_IGNORE_CASE,
972                        &errorCode);
973}
974
975U_CAPI int32_t U_EXPORT2
976u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
977    UErrorCode errorCode=U_ZERO_ERROR;
978    return u_strcmpFold(s1, n, s2, n,
979                        options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
980                        &errorCode);
981}
982