1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6*   Copyright (C) 2002-2016, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9*******************************************************************************
10*   file name:  strcase.cpp
11*   encoding:   UTF-8
12*   tab size:   8 (not used)
13*   indentation:4
14*
15*   created on: 2002mar12
16*   created by: Markus W. Scherer
17*
18*   Test file for string casing C++ API functions.
19*/
20
21#include "unicode/std_string.h"
22#include "unicode/brkiter.h"
23#include "unicode/casemap.h"
24#include "unicode/edits.h"
25#include "unicode/uchar.h"
26#include "unicode/ures.h"
27#include "unicode/uloc.h"
28#include "unicode/locid.h"
29#include "unicode/ubrk.h"
30#include "unicode/unistr.h"
31#include "unicode/ucasemap.h"
32#include "unicode/ustring.h"
33#include "ucase.h"
34#include "ustrtest.h"
35#include "unicode/tstdtmod.h"
36#include "cmemory.h"
37#include "testutil.h"
38
39class StringCaseTest: public IntlTest {
40public:
41    StringCaseTest();
42    virtual ~StringCaseTest();
43
44    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
45
46    void TestCaseConversion();
47
48    void TestCasingImpl(const UnicodeString &input,
49                        const UnicodeString &output,
50                        int32_t whichCase,
51                        void *iter, const char *localeID, uint32_t options);
52    void TestCasing();
53    void TestTitleOptions();
54    void TestFullCaseFoldingIterator();
55    void TestGreekUpper();
56    void TestLongUpper();
57    void TestMalformedUTF8();
58    void TestBufferOverflow();
59    void TestEdits();
60    void TestCopyMoveEdits();
61    void TestEditsFindFwdBwd();
62    void TestMergeEdits();
63    void TestCaseMapWithEdits();
64    void TestCaseMapUTF8WithEdits();
65    void TestCaseMapToString();
66    void TestCaseMapUTF8ToString();
67    void TestLongUnicodeString();
68    void TestBug13127();
69    void TestInPlaceTitle();
70
71private:
72    void assertGreekUpper(const char16_t *s, const char16_t *expected);
73
74    Locale GREEK_LOCALE_;
75};
76
77StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
78
79StringCaseTest::~StringCaseTest() {}
80
81extern IntlTest *createStringCaseTest() {
82    return new StringCaseTest();
83}
84
85void
86StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
87    if(exec) {
88        logln("TestSuite StringCaseTest: ");
89    }
90    TESTCASE_AUTO_BEGIN;
91    TESTCASE_AUTO(TestCaseConversion);
92#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
93    TESTCASE_AUTO(TestCasing);
94    TESTCASE_AUTO(TestTitleOptions);
95#endif
96    TESTCASE_AUTO(TestFullCaseFoldingIterator);
97    TESTCASE_AUTO(TestGreekUpper);
98    TESTCASE_AUTO(TestLongUpper);
99    TESTCASE_AUTO(TestMalformedUTF8);
100    TESTCASE_AUTO(TestBufferOverflow);
101    TESTCASE_AUTO(TestEdits);
102    TESTCASE_AUTO(TestCopyMoveEdits);
103    TESTCASE_AUTO(TestEditsFindFwdBwd);
104    TESTCASE_AUTO(TestMergeEdits);
105    TESTCASE_AUTO(TestCaseMapWithEdits);
106    TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
107    TESTCASE_AUTO(TestCaseMapToString);
108    TESTCASE_AUTO(TestCaseMapUTF8ToString);
109    TESTCASE_AUTO(TestLongUnicodeString);
110#if !UCONFIG_NO_BREAK_ITERATION
111    TESTCASE_AUTO(TestBug13127);
112    TESTCASE_AUTO(TestInPlaceTitle);
113#endif
114    TESTCASE_AUTO_END;
115}
116
117void
118StringCaseTest::TestCaseConversion()
119{
120    static const UChar uppercaseGreek[] =
121        { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
122        0x39f, 0x3a3, 0 };
123        // "IESUS CHRISTOS"
124
125    static const UChar lowercaseGreek[] =
126        { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
127        0x3bf, 0x3c2, 0 };
128        // "iesus christos"
129
130    static const UChar lowercaseTurkish[] =
131        { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
132        0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
133
134    static const UChar uppercaseTurkish[] =
135        { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
136        0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
137
138    UnicodeString expectedResult;
139    UnicodeString   test3;
140
141    test3 += (UChar32)0x0130;
142    test3 += "STANBUL, NOT CONSTANTINOPLE!";
143
144    UnicodeString   test4(test3);
145    test4.toLower(Locale(""));
146    expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
147    if (test4 != expectedResult)
148        errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
149
150    test4 = test3;
151    test4.toLower(Locale("tr", "TR"));
152    expectedResult = lowercaseTurkish;
153    if (test4 != expectedResult)
154        errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
155
156    test3 = "topkap";
157    test3 += (UChar32)0x0131;
158    test3 += " palace, istanbul";
159    test4 = test3;
160
161    test4.toUpper(Locale(""));
162    expectedResult = "TOPKAPI PALACE, ISTANBUL";
163    if (test4 != expectedResult)
164        errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
165
166    test4 = test3;
167    test4.toUpper(Locale("tr", "TR"));
168    expectedResult = uppercaseTurkish;
169    if (test4 != expectedResult)
170        errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
171
172    test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
173
174    test3.toUpper(Locale("de", "DE"));
175    expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
176    if (test3 != expectedResult)
177        errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\".");
178
179    test4.replace(0, test4.length(), uppercaseGreek);
180
181    test4.toLower(Locale("el", "GR"));
182    expectedResult = lowercaseGreek;
183    if (test4 != expectedResult)
184        errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
185
186    test4.replace(0, test4.length(), lowercaseGreek);
187
188    test4.toUpper();
189    expectedResult = uppercaseGreek;
190    if (test4 != expectedResult)
191        errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
192
193    // more string case mapping tests with the new implementation
194    {
195        static const UChar
196
197        beforeLower[]= { 0x61, 0x42, 0x49,  0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
198        lowerRoot[]=   { 0x61, 0x62, 0x69,  0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
199        lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
200
201        beforeUpper[]= { 0x61, 0x42, 0x69,  0x3c2, 0xdf,       0x3c3, 0x2f, 0xfb03,           0xfb03,           0xfb03,           0xd93f, 0xdfff },
202        upperRoot[]=   { 0x41, 0x42, 0x49,  0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
203        upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
204
205        beforeMiniUpper[]=  { 0xdf, 0x61 },
206        miniUpper[]=        { 0x53, 0x53, 0x41 };
207
208        UnicodeString s;
209
210        /* lowercase with root locale */
211        s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
212        s.toLower("");
213        if( s.length()!=UPRV_LENGTHOF(lowerRoot) ||
214            s!=UnicodeString(FALSE, lowerRoot, s.length())
215        ) {
216            errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\"");
217        }
218
219        /* lowercase with turkish locale */
220        s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
221        s.setCharAt(0, beforeLower[0]).toLower(Locale("tr"));
222        if( s.length()!=UPRV_LENGTHOF(lowerTurkish) ||
223            s!=UnicodeString(FALSE, lowerTurkish, s.length())
224        ) {
225            errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\"");
226        }
227
228        /* uppercase with root locale */
229        s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
230        s.setCharAt(0, beforeUpper[0]).toUpper(Locale(""));
231        if( s.length()!=UPRV_LENGTHOF(upperRoot) ||
232            s!=UnicodeString(FALSE, upperRoot, s.length())
233        ) {
234            errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\"");
235        }
236
237        /* uppercase with turkish locale */
238        s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
239        s.toUpper(Locale("tr"));
240        if( s.length()!=UPRV_LENGTHOF(upperTurkish) ||
241            s!=UnicodeString(FALSE, upperTurkish, s.length())
242        ) {
243            errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\"");
244        }
245
246        /* uppercase a short string with root locale */
247        s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper));
248        s.setCharAt(0, beforeMiniUpper[0]).toUpper("");
249        if( s.length()!=UPRV_LENGTHOF(miniUpper) ||
250            s!=UnicodeString(FALSE, miniUpper, s.length())
251        ) {
252            errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\"");
253        }
254    }
255
256    // test some supplementary characters (>= Unicode 3.1)
257    {
258        UnicodeString t;
259
260        UnicodeString
261            deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
262            deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
263            deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape();
264        (t=deseretInput).toLower();
265        if(t!=deseretLower) {
266            errln("error lowercasing Deseret (plane 1) characters");
267        }
268        (t=deseretInput).toUpper();
269        if(t!=deseretUpper) {
270            errln("error uppercasing Deseret (plane 1) characters");
271        }
272    }
273
274    // test some more cases that looked like problems
275    {
276        UnicodeString t;
277
278        UnicodeString
279            ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
280            ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
281            ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
282        (t=ljInput).toLower("en");
283        if(t!=ljLower) {
284            errln("error lowercasing LJ characters");
285        }
286        (t=ljInput).toUpper("en");
287        if(t!=ljUpper) {
288            errln("error uppercasing LJ characters");
289        }
290    }
291
292#if !UCONFIG_NO_NORMALIZATION
293    // some context-sensitive casing depends on normalization data being present
294
295    // Unicode 3.1.1 SpecialCasing tests
296    {
297        UnicodeString t;
298
299        // sigmas preceded and/or followed by cased letters
300        UnicodeString
301            sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
302            sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
303            sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
304
305        (t=sigmas).toLower();
306        if(t!=sigmasLower) {
307            errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\"");
308        }
309
310        (t=sigmas).toUpper(Locale(""));
311        if(t!=sigmasUpper) {
312            errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\"");
313        }
314
315        // turkish & azerbaijani dotless i & dotted I
316        // remove dot above if there was a capital I before and there are no more accents above
317        UnicodeString
318            dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
319            dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
320            dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
321
322        (t=dots).toLower("tr");
323        if(t!=dotsTurkish) {
324            errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\"");
325        }
326
327        (t=dots).toLower("de");
328        if(t!=dotsDefault) {
329            errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
330        }
331    }
332
333    // more Unicode 3.1.1 tests
334    {
335        UnicodeString t;
336
337        // lithuanian dot above in uppercasing
338        UnicodeString
339            dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
340            dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
341            dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
342
343        (t=dots).toUpper("lt");
344        if(t!=dotsLithuanian) {
345            errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\"");
346        }
347
348        (t=dots).toUpper("de");
349        if(t!=dotsDefault) {
350            errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
351        }
352
353        // lithuanian adds dot above to i in lowercasing if there are more above accents
354        UnicodeString
355            i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
356            iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
357            iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
358
359        (t=i).toLower("lt");
360        if(t!=iLithuanian) {
361            errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\"");
362        }
363
364        (t=i).toLower("de");
365        if(t!=iDefault) {
366            errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\"");
367        }
368    }
369
370#endif
371
372    // test case folding
373    {
374        UnicodeString
375            s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
376            f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
377            g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
378            t;
379
380        (t=s).foldCase();
381        if(f!=t) {
382            errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\"");
383        }
384
385        // alternate handling for dotted I/dotless i (U+0130, U+0131)
386        (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I);
387        if(g!=t) {
388            errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\"");
389        }
390    }
391}
392
393// data-driven case mapping tests ------------------------------------------ ***
394
395enum {
396    TEST_LOWER,
397    TEST_UPPER,
398    TEST_TITLE,
399    TEST_FOLD,
400    TEST_COUNT
401};
402
403// names of TestData children in casing.txt
404static const char *const dataNames[TEST_COUNT+1]={
405    "lowercasing",
406    "uppercasing",
407    "titlecasing",
408    "casefolding",
409    ""
410};
411
412void
413StringCaseTest::TestCasingImpl(const UnicodeString &input,
414                               const UnicodeString &output,
415                               int32_t whichCase,
416                               void *iter, const char *localeID, uint32_t options) {
417    // UnicodeString
418    UnicodeString result;
419    const char *name;
420    Locale locale(localeID);
421
422    result=input;
423    switch(whichCase) {
424    case TEST_LOWER:
425        name="toLower";
426        result.toLower(locale);
427        break;
428    case TEST_UPPER:
429        name="toUpper";
430        result.toUpper(locale);
431        break;
432#if !UCONFIG_NO_BREAK_ITERATION
433    case TEST_TITLE:
434        name="toTitle";
435        result.toTitle((BreakIterator *)iter, locale, options);
436        break;
437#endif
438    case TEST_FOLD:
439        name="foldCase";
440        result.foldCase(options);
441        break;
442    default:
443        name="";
444        break; // won't happen
445    }
446    if(result!=output) {
447        dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
448    }
449#if !UCONFIG_NO_BREAK_ITERATION
450    if(whichCase==TEST_TITLE && options==0) {
451        result=input;
452        result.toTitle((BreakIterator *)iter, locale);
453        if(result!=output) {
454            dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
455        }
456    }
457#endif
458
459    // UTF-8
460    char utf8In[100], utf8Out[100];
461    int32_t utf8InLength, utf8OutLength, resultLength;
462    UChar *buffer;
463
464    IcuTestErrorCode errorCode(*this, "TestCasingImpl");
465    LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode));
466#if !UCONFIG_NO_BREAK_ITERATION
467    if(iter!=NULL) {
468        // Clone the break iterator so that the UCaseMap can safely adopt it.
469        UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode);
470        ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode);
471    }
472#endif
473
474    u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode);
475    switch(whichCase) {
476    case TEST_LOWER:
477        name="ucasemap_utf8ToLower";
478        utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
479                    utf8Out, (int32_t)sizeof(utf8Out),
480                    utf8In, utf8InLength, errorCode);
481        break;
482    case TEST_UPPER:
483        name="ucasemap_utf8ToUpper";
484        utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(),
485                    utf8Out, (int32_t)sizeof(utf8Out),
486                    utf8In, utf8InLength, errorCode);
487        break;
488#if !UCONFIG_NO_BREAK_ITERATION
489    case TEST_TITLE:
490        name="ucasemap_utf8ToTitle";
491        utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(),
492                    utf8Out, (int32_t)sizeof(utf8Out),
493                    utf8In, utf8InLength, errorCode);
494        break;
495#endif
496    case TEST_FOLD:
497        name="ucasemap_utf8FoldCase";
498        utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(),
499                    utf8Out, (int32_t)sizeof(utf8Out),
500                    utf8In, utf8InLength, errorCode);
501        break;
502    default:
503        name="";
504        utf8OutLength=0;
505        break; // won't happen
506    }
507    buffer=result.getBuffer(utf8OutLength);
508    u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode);
509    result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0);
510
511    if(errorCode.isFailure()) {
512        errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
513        errorCode.reset();
514    } else if(result!=output) {
515        errln("error: %s() got a wrong result for a test case from casing.res", name);
516        errln("expected \"" + output + "\" got \"" + result + "\"" );
517    }
518}
519
520void
521StringCaseTest::TestCasing() {
522    UErrorCode status = U_ZERO_ERROR;
523#if !UCONFIG_NO_BREAK_ITERATION
524    LocalUBreakIteratorPointer iter;
525#endif
526    char cLocaleID[100];
527    UnicodeString locale, input, output, optionsString, result;
528    uint32_t options;
529    int32_t whichCase, type;
530    LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status));
531    if(U_SUCCESS(status)) {
532        for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) {
533#if UCONFIG_NO_BREAK_ITERATION
534            if(whichCase==TEST_TITLE) {
535                continue;
536            }
537#endif
538            LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status));
539            if(U_FAILURE(status)) {
540                errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status));
541                break;
542            }
543            const DataMap *myCase = NULL;
544            while(casingTest->nextCase(myCase, status)) {
545                input = myCase->getString("Input", status);
546                output = myCase->getString("Output", status);
547
548                if(whichCase!=TEST_FOLD) {
549                    locale = myCase->getString("Locale", status);
550                }
551                locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
552
553#if !UCONFIG_NO_BREAK_ITERATION
554                if(whichCase==TEST_TITLE) {
555                    type = myCase->getInt("Type", status);
556                    if(type>=0) {
557                        iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status));
558                    } else if(type==-2) {
559                        // Open a trivial break iterator that only delivers { 0, length }
560                        // or even just { 0 } as boundaries.
561                        static const UChar rules[] = { 0x2e, 0x2a, 0x3b };  // ".*;"
562                        UParseError parseError;
563                        iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status));
564                    }
565                }
566#endif
567                options = 0;
568                if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) {
569                    optionsString = myCase->getString("Options", status);
570                    if(optionsString.indexOf((UChar)0x54)>=0) {  // T
571                        options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I;
572                    }
573                    if(optionsString.indexOf((UChar)0x4c)>=0) {  // L
574                        options|=U_TITLECASE_NO_LOWERCASE;
575                    }
576                    if(optionsString.indexOf((UChar)0x41)>=0) {  // A
577                        options|=U_TITLECASE_NO_BREAK_ADJUSTMENT;
578                    }
579                }
580
581                if(U_FAILURE(status)) {
582                    dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase],  u_errorName(status));
583                    status = U_ZERO_ERROR;
584                } else {
585#if UCONFIG_NO_BREAK_ITERATION
586                    LocalPointer<UMemory> iter;
587#endif
588                    TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options);
589                }
590
591#if !UCONFIG_NO_BREAK_ITERATION
592                iter.adoptInstead(NULL);
593#endif
594            }
595        }
596    }
597
598#if !UCONFIG_NO_BREAK_ITERATION
599    // more tests for API coverage
600    status=U_ZERO_ERROR;
601    input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
602    (result=input).toTitle(NULL);
603    if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
604        dataerrln("UnicodeString::toTitle(NULL) failed.");
605    }
606#endif
607}
608
609void
610StringCaseTest::TestTitleOptions() {
611    // New options in ICU 60.
612    TestCasingImpl(u"ʻcAt! ʻeTc.", u"ʻCat! ʻetc.", TEST_TITLE,
613                   nullptr, "", U_TITLECASE_WHOLE_STRING);
614    TestCasingImpl(u"a ʻCaT. A ʻdOg! ʻeTc.", u"A ʻCaT. A ʻdOg! ʻETc.", TEST_TITLE,
615                   nullptr, "", U_TITLECASE_SENTENCES|U_TITLECASE_NO_LOWERCASE);
616    TestCasingImpl(u"49eRs", u"49ers", TEST_TITLE,
617                   nullptr, "", U_TITLECASE_WHOLE_STRING);
618    TestCasingImpl(u"«丰(aBc)»", u"«丰(abc)»", TEST_TITLE,
619                   nullptr, "", U_TITLECASE_WHOLE_STRING);
620    TestCasingImpl(u"49eRs", u"49Ers", TEST_TITLE,
621                   nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
622    TestCasingImpl(u"«丰(aBc)»", u"«丰(Abc)»", TEST_TITLE,
623                   nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
624    TestCasingImpl(u" john. Smith", u" John. Smith", TEST_TITLE,
625                   nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_LOWERCASE);
626    TestCasingImpl(u" john. Smith", u" john. smith", TEST_TITLE,
627                   nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_BREAK_ADJUSTMENT);
628    TestCasingImpl(u"«ijs»", u"«IJs»", TEST_TITLE,
629                   nullptr, "nl-BE", U_TITLECASE_WHOLE_STRING);
630    TestCasingImpl(u"«ijs»", u"«İjs»", TEST_TITLE,
631                   nullptr, "tr-DE", U_TITLECASE_WHOLE_STRING);
632
633#if !UCONFIG_NO_BREAK_ITERATION
634    // Test conflicting settings.
635    // If & when we add more options, then the ORed combinations may become
636    // indistinguishable from valid values.
637    IcuTestErrorCode errorCode(*this, "TestTitleOptions");
638    CaseMap::toTitle("", U_TITLECASE_NO_BREAK_ADJUSTMENT|U_TITLECASE_ADJUST_TO_CASED, nullptr,
639                     u"", 0, nullptr, 0, nullptr, errorCode);
640    if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
641        errln("CaseMap::toTitle(multiple adjustment options) -> %s not illegal argument",
642              errorCode.errorName());
643    }
644    errorCode.reset();
645    CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING|U_TITLECASE_SENTENCES, nullptr,
646                     u"", 0, nullptr, 0, nullptr, errorCode);
647    if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
648        errln("CaseMap::toTitle(multiple iterator options) -> %s not illegal argument",
649              errorCode.errorName());
650    }
651    errorCode.reset();
652    LocalPointer<BreakIterator> iter(
653        BreakIterator::createCharacterInstance(Locale::getRoot(), errorCode));
654    CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING, iter.getAlias(),
655                     u"", 0, nullptr, 0, nullptr, errorCode);
656    if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
657        errln("CaseMap::toTitle(iterator option + iterator) -> %s not illegal argument",
658              errorCode.errorName());
659    }
660    errorCode.reset();
661#endif
662}
663
664void
665StringCaseTest::TestFullCaseFoldingIterator() {
666    UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
667    UnicodeString ss=UNICODE_STRING_SIMPLE("ss");
668    FullCaseFoldingIterator iter;
669    int32_t count=0;
670    int32_t countSpecific=0;
671    UChar32 c;
672    UnicodeString full;
673    while((c=iter.next(full))>=0) {
674        ++count;
675        // Check that the full Case_Folding has more than 1 code point.
676        if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) {
677            errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c);
678            continue;
679        }
680        // Check that full == Case_Folding(c).
681        UnicodeString cf(c);
682        cf.foldCase();
683        if(full!=cf) {
684            errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c);
685            continue;
686        }
687        // Spot-check a couple of specific cases.
688        if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) {
689            ++countSpecific;
690        }
691    }
692    if(countSpecific!=3) {
693        errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
694    }
695    if(count<70) {
696        errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count);
697    }
698}
699
700void
701StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) {
702    UnicodeString s16(s);
703    UnicodeString expected16(expected);
704    UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")";
705    UnicodeString result16(s16);
706    result16.toUpper(GREEK_LOCALE_);
707    assertEquals(msg, expected16, result16);
708
709    msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap=";
710    int32_t length = expected16.length();
711    int32_t capacities[] = {
712        // Keep in sync with the UTF-8 capacities near the bottom of this function.
713        0, length / 2, length - 1, length, length + 1
714    };
715    for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
716        int32_t cap = capacities[i];
717        UChar *dest16 = result16.getBuffer(expected16.length() + 1);
718        u_memset(dest16, 0x55AA, result16.getCapacity());
719        UErrorCode errorCode = U_ZERO_ERROR;
720        length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode);
721        assertEquals(msg + cap, expected16.length(), length);
722        UErrorCode expectedErrorCode;
723        if (cap < expected16.length()) {
724            expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
725        } else if (cap == expected16.length()) {
726            expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
727        } else {
728            expectedErrorCode = U_ZERO_ERROR;
729            assertEquals(msg + cap + " NUL", 0, dest16[length]);
730        }
731        assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
732        result16.releaseBuffer(length);
733        if (cap >= expected16.length()) {
734            assertEquals(msg + cap, expected16, result16);
735        }
736    }
737
738    UErrorCode errorCode = U_ZERO_ERROR;
739    LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode));
740    assertSuccess("ucasemap_open", errorCode);
741    std::string s8;
742    s16.toUTF8String(s8);
743    msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")";
744    char dest8[1000];
745    length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8),
746                                  s8.data(), s8.length(), &errorCode);
747    assertSuccess("ucasemap_utf8ToUpper", errorCode);
748    StringPiece result8(dest8, length);
749    UnicodeString result16From8 = UnicodeString::fromUTF8(result8);
750    assertEquals(msg, expected16, result16From8);
751
752    msg += " cap=";
753    capacities[1] = length / 2;
754    capacities[2] = length - 1;
755    capacities[3] = length;
756    capacities[4] = length + 1;
757    char dest8b[1000];
758    int32_t expected8Length = length;  // Assuming the previous call worked.
759    for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
760        int32_t cap = capacities[i];
761        memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b));
762        UErrorCode errorCode = U_ZERO_ERROR;
763        length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap,
764                                      s8.data(), s8.length(), &errorCode);
765        assertEquals(msg + cap, expected8Length, length);
766        UErrorCode expectedErrorCode;
767        if (cap < expected8Length) {
768            expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
769        } else if (cap == expected8Length) {
770            expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
771        } else {
772            expectedErrorCode = U_ZERO_ERROR;
773            // Casts to int32_t to avoid matching UBool.
774            assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]);
775        }
776        assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
777        if (cap >= expected8Length) {
778            assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length));
779        }
780    }
781}
782
783void
784StringCaseTest::TestGreekUpper() {
785    // http://bugs.icu-project.org/trac/ticket/5456
786    assertGreekUpper(u"άδικος, κείμενο, ίριδα", u"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
787    // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
788    // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
789    assertGreekUpper(u"Πατάτα", u"ΠΑΤΑΤΑ");
790    assertGreekUpper(u"Αέρας, Μυστήριο, Ωραίο", u"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
791    assertGreekUpper(u"Μαΐου, Πόρος, Ρύθμιση", u"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
792    assertGreekUpper(u"ΰ, Τηρώ, Μάιος", u"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
793    assertGreekUpper(u"άυλος", u"ΑΫΛΟΣ");
794    assertGreekUpper(u"ΑΫΛΟΣ", u"ΑΫΛΟΣ");
795    assertGreekUpper(u"Άκλιτα ρήματα ή άκλιτες μετοχές", u"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
796    // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
797    assertGreekUpper(u"Επειδή η αναγνώριση της αξιοπρέπειας", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
798    assertGreekUpper(u"νομικού ή διεθνούς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
799    // http://unicode.org/udhr/d/udhr_ell_polytonic.html
800    assertGreekUpper(u"Ἐπειδὴ ἡ ἀναγνώριση", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
801    assertGreekUpper(u"νομικοῦ ἢ διεθνοῦς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
802    // From Google bug report
803    assertGreekUpper(u"Νέο, Δημιουργία", u"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
804    // http://crbug.com/234797
805    assertGreekUpper(u"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
806    assertGreekUpper(u"Μαΐου, τρόλεϊ", u"ΜΑΪΟΥ, ΤΡΟΛΕΪ");
807    assertGreekUpper(u"Το ένα ή το άλλο.", u"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
808    // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
809    assertGreekUpper(u"ρωμέικα", u"ΡΩΜΕΪΚΑ");
810    assertGreekUpper(u"ή.", u"Ή.");
811}
812
813void
814StringCaseTest::TestLongUpper() {
815    if (quick) {
816        logln("not exhaustive mode: skipping this test");
817        return;
818    }
819    // Ticket #12663, crash with an extremely long string where
820    // U+0390 maps to 0399 0308 0301 so that the result is three times as long
821    // and overflows an int32_t.
822    int32_t length = 0x40000004;  // more than 1G UChars
823    UnicodeString s(length, (UChar32)0x390, length);
824    UnicodeString result;
825    UChar *dest = result.getBuffer(length + 1);
826    if (s.isBogus() || dest == NULL) {
827        logln("Out of memory, unable to run this test on this machine.");
828        return;
829    }
830    IcuTestErrorCode errorCode(*this, "TestLongUpper");
831    int32_t destLength = u_strToUpper(dest, result.getCapacity(),
832                                      s.getBuffer(), s.length(), "", errorCode);
833    result.releaseBuffer(destLength);
834    if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) {
835        errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)",
836              errorCode.errorName(), (long)destLength);
837    }
838}
839
840void StringCaseTest::TestMalformedUTF8() {
841    // ticket #12639
842    IcuTestErrorCode errorCode(*this, "TestMalformedUTF8");
843    LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode));
844    if (errorCode.isFailure()) {
845        errln("ucasemap_open(English) failed - %s", errorCode.errorName());
846        return;
847    }
848    char src[1] = { (char)0x85 };  // malformed UTF-8
849    char dest[3] = { 0, 0, 0 };
850    int32_t destLength;
851#if !UCONFIG_NO_BREAK_ITERATION
852    destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode);
853    if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
854        errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
855              errorCode.errorName(), (int)destLength, dest[0]);
856    }
857#endif
858
859    errorCode.reset();
860    dest[0] = 0;
861    destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode);
862    if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
863        errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
864              errorCode.errorName(), (int)destLength, dest[0]);
865    }
866
867    errorCode.reset();
868    dest[0] = 0;
869    destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode);
870    if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
871        errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
872              errorCode.errorName(), (int)destLength, dest[0]);
873    }
874
875    errorCode.reset();
876    dest[0] = 0;
877    destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode);
878    if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
879        errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
880              errorCode.errorName(), (int)destLength, dest[0]);
881    }
882}
883
884void StringCaseTest::TestBufferOverflow() {
885    // Ticket #12849, incorrect result from Title Case preflight operation,
886    // when buffer overflow error is expected.
887    IcuTestErrorCode errorCode(*this, "TestBufferOverflow");
888    LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode));
889    if (errorCode.isFailure()) {
890        errln("ucasemap_open(English) failed - %s", errorCode.errorName());
891        return;
892    }
893
894    UnicodeString data("hello world");
895    int32_t result;
896#if !UCONFIG_NO_BREAK_ITERATION
897    result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode);
898    if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) {
899        errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
900              "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
901              __FILE__, __LINE__, data.length(), errorCode.errorName(), result);
902    }
903#endif
904    errorCode.reset();
905
906    std::string data_utf8;
907    data.toUTF8String(data_utf8);
908#if !UCONFIG_NO_BREAK_ITERATION
909    result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), data_utf8.length(), errorCode);
910    if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) {
911        errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
912              "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
913              __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result);
914    }
915#endif
916    errorCode.reset();
917}
918
919void StringCaseTest::TestEdits() {
920    IcuTestErrorCode errorCode(*this, "TestEdits");
921    Edits edits;
922    assertFalse("new Edits hasChanges", edits.hasChanges());
923    assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
924    assertEquals("new Edits", 0, edits.lengthDelta());
925    edits.addUnchanged(1);  // multiple unchanged ranges are combined
926    edits.addUnchanged(10000);  // too long, and they are split
927    edits.addReplace(0, 0);
928    edits.addUnchanged(2);
929    assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
930    assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
931    assertEquals("unchanged 10003", 0, edits.lengthDelta());
932    edits.addReplace(2, 1);  // multiple short equal-lengths edits are compressed
933    edits.addUnchanged(0);
934    edits.addReplace(2, 1);
935    edits.addReplace(2, 1);
936    edits.addReplace(0, 10);
937    edits.addReplace(100, 0);
938    edits.addReplace(3000, 4000);  // variable-length encoding
939    edits.addReplace(100000, 100000);
940    assertTrue("some edits hasChanges", edits.hasChanges());
941    assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
942    assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta());
943    UErrorCode outErrorCode = U_ZERO_ERROR;
944    assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
945
946    static const EditChange coarseExpectedChanges[] = {
947            { FALSE, 10003, 10003 },
948            { TRUE, 103106, 104013 }
949    };
950    TestUtility::checkEditsIter(*this, u"coarse",
951            edits.getCoarseIterator(), edits.getCoarseIterator(),
952            coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
953    TestUtility::checkEditsIter(*this, u"coarse changes",
954            edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
955            coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
956
957    static const EditChange fineExpectedChanges[] = {
958            { FALSE, 10003, 10003 },
959            { TRUE, 2, 1 },
960            { TRUE, 2, 1 },
961            { TRUE, 2, 1 },
962            { TRUE, 0, 10 },
963            { TRUE, 100, 0 },
964            { TRUE, 3000, 4000 },
965            { TRUE, 100000, 100000 }
966    };
967    TestUtility::checkEditsIter(*this, u"fine",
968            edits.getFineIterator(), edits.getFineIterator(),
969            fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
970    TestUtility::checkEditsIter(*this, u"fine changes",
971            edits.getFineChangesIterator(), edits.getFineChangesIterator(),
972            fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
973
974    edits.reset();
975    assertFalse("reset hasChanges", edits.hasChanges());
976    assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
977    assertEquals("reset", 0, edits.lengthDelta());
978    Edits::Iterator ei = edits.getCoarseChangesIterator();
979    assertFalse("reset then iterator", ei.next(errorCode));
980}
981
982void StringCaseTest::TestCopyMoveEdits() {
983    IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits");
984    // Exceed the stack array capacity.
985    Edits a;
986    for (int32_t i = 0; i < 250; ++i) {
987        a.addReplace(i % 10, (i % 10) + 1);
988    }
989    assertEquals("a: many edits, length delta", 250, a.lengthDelta());
990
991    // copy
992    Edits b(a);
993    assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta());
994    assertEquals("a remains: many edits, length delta", 250, a.lengthDelta());
995    TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode);
996
997    // assign
998    Edits c;
999    c.addUnchanged(99);
1000    c.addReplace(88, 77);
1001    c = b;
1002    assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta());
1003    assertEquals("b remains: many edits, length delta", 250, b.lengthDelta());
1004    TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode);
1005
1006    // std::move trouble on these platforms.
1007    // See https://ssl.icu-project.org/trac/ticket/13393
1008#if !UPRV_INCOMPLETE_CPP11_SUPPORT && !(U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390)
1009    // move constructor empties object with heap array
1010    Edits d(std::move(a));
1011    assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta());
1012    assertFalse("a moved away: no more hasChanges", a.hasChanges());
1013    TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode);
1014    Edits empty;
1015    TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode);
1016
1017    // move assignment empties object with heap array
1018    Edits e;
1019    e.addReplace(0, 1000);
1020    e = std::move(b);
1021    assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta());
1022    assertFalse("b moved away: no more hasChanges", b.hasChanges());
1023    TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode);
1024    TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode);
1025
1026    // Edits::Iterator default constructor.
1027    Edits::Iterator iter;
1028    assertFalse("Edits::Iterator().next()", iter.next(errorCode));
1029    assertSuccess("Edits::Iterator().next()", errorCode);
1030    iter = e.getFineChangesIterator();
1031    assertTrue("iter.next()", iter.next(errorCode));
1032    assertSuccess("iter.next()", errorCode);
1033    assertTrue("iter.hasChange()", iter.hasChange());
1034    assertEquals("iter.newLength()", 1, iter.newLength());
1035#endif
1036}
1037
1038void StringCaseTest::TestEditsFindFwdBwd() {
1039    IcuTestErrorCode errorCode(*this, "TestEditsFindFwdBwd");
1040    // Some users need index mappings to be efficient when they are out of order.
1041    // The most interesting failure case for this test is it taking a very long time.
1042    Edits e;
1043    constexpr int32_t N = 200000;
1044    for (int32_t i = 0; i < N; ++i) {
1045        e.addUnchanged(1);
1046        e.addReplace(3, 1);
1047    }
1048    Edits::Iterator iter = e.getFineIterator();
1049    for (int32_t i = 0; i <= N; i += 2) {
1050        assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1051        assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1052    }
1053    for (int32_t i = N; i >= 0; i -= 2) {
1054        assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1055        assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1056    }
1057}
1058
1059void StringCaseTest::TestMergeEdits() {
1060    // For debugging, set -v to see matching edits up to a failure.
1061    IcuTestErrorCode errorCode(*this, "TestMergeEdits");
1062    Edits ab, bc, ac, expected_ac;
1063
1064    // Simple: Two parallel non-changes.
1065    ab.addUnchanged(2);
1066    bc.addUnchanged(2);
1067    expected_ac.addUnchanged(2);
1068
1069    // Simple: Two aligned changes.
1070    ab.addReplace(3, 2);
1071    bc.addReplace(2, 1);
1072    expected_ac.addReplace(3, 1);
1073
1074    // Unequal non-changes.
1075    ab.addUnchanged(5);
1076    bc.addUnchanged(3);
1077    expected_ac.addUnchanged(3);
1078    // ab ahead by 2
1079
1080    // Overlapping changes accumulate until they share a boundary.
1081    ab.addReplace(4, 3);
1082    bc.addReplace(3, 2);
1083    ab.addReplace(4, 3);
1084    bc.addReplace(3, 2);
1085    ab.addReplace(4, 3);
1086    bc.addReplace(3, 2);
1087    bc.addUnchanged(4);
1088    expected_ac.addReplace(14, 8);
1089    // bc ahead by 2
1090
1091    // Balance out intermediate-string lengths.
1092    ab.addUnchanged(2);
1093    expected_ac.addUnchanged(2);
1094
1095    // Insert something and delete it: Should disappear.
1096    ab.addReplace(0, 5);
1097    ab.addReplace(0, 2);
1098    bc.addReplace(7, 0);
1099
1100    // Parallel change to make a new boundary.
1101    ab.addReplace(1, 2);
1102    bc.addReplace(2, 3);
1103    expected_ac.addReplace(1, 3);
1104
1105    // Multiple ab deletions should remain separate at the boundary.
1106    ab.addReplace(1, 0);
1107    ab.addReplace(2, 0);
1108    ab.addReplace(3, 0);
1109    expected_ac.addReplace(1, 0);
1110    expected_ac.addReplace(2, 0);
1111    expected_ac.addReplace(3, 0);
1112
1113    // Unequal non-changes can be split for another boundary.
1114    ab.addUnchanged(2);
1115    bc.addUnchanged(1);
1116    expected_ac.addUnchanged(1);
1117    // ab ahead by 1
1118
1119    // Multiple bc insertions should create a boundary and remain separate.
1120    bc.addReplace(0, 4);
1121    bc.addReplace(0, 5);
1122    bc.addReplace(0, 6);
1123    expected_ac.addReplace(0, 4);
1124    expected_ac.addReplace(0, 5);
1125    expected_ac.addReplace(0, 6);
1126    // ab ahead by 1
1127
1128    // Multiple ab deletions in the middle of a bc change are merged.
1129    bc.addReplace(2, 2);
1130    // bc ahead by 1
1131    ab.addReplace(1, 0);
1132    ab.addReplace(2, 0);
1133    ab.addReplace(3, 0);
1134    ab.addReplace(4, 1);
1135    expected_ac.addReplace(11, 2);
1136
1137    // Multiple bc insertions in the middle of an ab change are merged.
1138    ab.addReplace(5, 6);
1139    bc.addReplace(3, 3);
1140    // ab ahead by 3
1141    bc.addReplace(0, 4);
1142    bc.addReplace(0, 5);
1143    bc.addReplace(0, 6);
1144    bc.addReplace(3, 7);
1145    expected_ac.addReplace(5, 25);
1146
1147    // Delete around a deletion.
1148    ab.addReplace(4, 4);
1149    ab.addReplace(3, 0);
1150    ab.addUnchanged(2);
1151    bc.addReplace(2, 2);
1152    bc.addReplace(4, 0);
1153    expected_ac.addReplace(9, 2);
1154
1155    // Insert into an insertion.
1156    ab.addReplace(0, 2);
1157    bc.addReplace(1, 1);
1158    bc.addReplace(0, 8);
1159    bc.addUnchanged(4);
1160    expected_ac.addReplace(0, 10);
1161    // bc ahead by 3
1162
1163    // Balance out intermediate-string lengths.
1164    ab.addUnchanged(3);
1165    expected_ac.addUnchanged(3);
1166
1167    // Deletions meet insertions.
1168    // Output order is arbitrary in principle, but we expect insertions first
1169    // and want to keep it that way.
1170    ab.addReplace(2, 0);
1171    ab.addReplace(4, 0);
1172    ab.addReplace(6, 0);
1173    bc.addReplace(0, 1);
1174    bc.addReplace(0, 3);
1175    bc.addReplace(0, 5);
1176    expected_ac.addReplace(0, 1);
1177    expected_ac.addReplace(0, 3);
1178    expected_ac.addReplace(0, 5);
1179    expected_ac.addReplace(2, 0);
1180    expected_ac.addReplace(4, 0);
1181    expected_ac.addReplace(6, 0);
1182
1183    // End with a non-change, so that further edits are never reordered.
1184    ab.addUnchanged(1);
1185    bc.addUnchanged(1);
1186    expected_ac.addUnchanged(1);
1187
1188    ac.mergeAndAppend(ab, bc, errorCode);
1189    assertSuccess("ab+bc", errorCode);
1190    if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) {
1191        return;
1192    }
1193
1194    // Append more Edits.
1195    Edits ab2, bc2;
1196    ab2.addUnchanged(5);
1197    bc2.addReplace(1, 2);
1198    bc2.addUnchanged(4);
1199    expected_ac.addReplace(1, 2);
1200    expected_ac.addUnchanged(4);
1201    ac.mergeAndAppend(ab2, bc2, errorCode);
1202    assertSuccess("ab2+bc2", errorCode);
1203    if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) {
1204        return;
1205    }
1206
1207    // Append empty edits.
1208    Edits empty;
1209    ac.mergeAndAppend(empty, empty, errorCode);
1210    assertSuccess("empty+empty", errorCode);
1211    if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) {
1212        return;
1213    }
1214
1215    // Error: Append more edits with mismatched intermediate-string lengths.
1216    Edits mismatch;
1217    mismatch.addReplace(1, 1);
1218    ac.mergeAndAppend(ab2, mismatch, errorCode);
1219    assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1220    errorCode.reset();
1221    ac.mergeAndAppend(mismatch, bc2, errorCode);
1222    assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1223    errorCode.reset();
1224}
1225
1226void StringCaseTest::TestCaseMapWithEdits() {
1227    IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits");
1228    UChar dest[20];
1229    Edits edits;
1230
1231    int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1232                                      u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1233    assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
1234    static const EditChange lowerExpectedChanges[] = {
1235            { TRUE, 1, 1 },
1236            { FALSE, 4, 4 },
1237            { TRUE, 1, 1 },
1238            { FALSE, 2, 2 }
1239    };
1240    TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1241            edits.getFineIterator(), edits.getFineIterator(),
1242            lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
1243            TRUE, errorCode);
1244
1245    edits.reset();
1246    length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1247                              u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1248    assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1249    static const EditChange upperExpectedChanges[] = {
1250            { FALSE, 1, 1 },
1251            { TRUE, 1, 1 },
1252            { TRUE, 1, 1 },
1253            { TRUE, 1, 1 },
1254            { TRUE, 1, 1 },
1255            { TRUE, 1, 1 }
1256    };
1257    TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1258            edits.getFineIterator(), edits.getFineIterator(),
1259            upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1260            TRUE, errorCode);
1261
1262    edits.reset();
1263
1264#if !UCONFIG_NO_BREAK_ITERATION
1265    length = CaseMap::toTitle("nl",
1266                              U_OMIT_UNCHANGED_TEXT |
1267                              U_TITLECASE_NO_BREAK_ADJUSTMENT |
1268                              U_TITLECASE_NO_LOWERCASE,
1269                              nullptr, u"IjssEL IglOo", 12,
1270                              dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1271    assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
1272    static const EditChange titleExpectedChanges[] = {
1273            { FALSE, 1, 1 },
1274            { TRUE, 1, 1 },
1275            { FALSE, 10, 10 }
1276    };
1277    TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1278            edits.getFineIterator(), edits.getFineIterator(),
1279            titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1280            TRUE, errorCode);
1281#endif
1282
1283    // No explicit nor automatic edits.reset(). Edits should be appended.
1284    length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1285                           u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1286    assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
1287    static const EditChange foldExpectedChanges[] = {
1288#if !UCONFIG_NO_BREAK_ITERATION
1289            // From titlecasing.
1290            { FALSE, 1, 1 },
1291            { TRUE, 1, 1 },
1292            { FALSE, 10, 10 },
1293#endif
1294            // From case folding.
1295            { TRUE, 1, 1 },
1296            { TRUE, 1, 2 },
1297            { FALSE, 3, 3 },
1298            { TRUE, 1, 1 },
1299            { FALSE, 2, 2 }
1300    };
1301    TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, IßtanBul)",
1302            edits.getFineIterator(), edits.getFineIterator(),
1303            foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1304            TRUE, errorCode);
1305}
1306
1307void StringCaseTest::TestCaseMapUTF8WithEdits() {
1308    IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8WithEdits");
1309    char dest[50];
1310    Edits edits;
1311
1312    int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT,
1313                                          u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1314    assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
1315                 UnicodeString::fromUTF8(StringPiece(dest, length)));
1316    static const EditChange lowerExpectedChanges[] = {
1317            { TRUE, 1, 2 },
1318            { FALSE, 4, 4 },
1319            { TRUE, 1, 1 },
1320            { FALSE, 2, 2 }
1321    };
1322    TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1323            edits.getFineIterator(), edits.getFineIterator(),
1324            lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
1325            TRUE, errorCode);
1326
1327    edits.reset();
1328    length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT,
1329                                  u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1330    assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1331                 UnicodeString::fromUTF8(StringPiece(dest, length)));
1332    static const EditChange upperExpectedChanges[] = {
1333            { FALSE, 2, 2 },
1334            { TRUE, 2, 2 },
1335            { TRUE, 2, 2 },
1336            { TRUE, 2, 2 },
1337            { TRUE, 2, 2 },
1338            { TRUE, 2, 2 }
1339    };
1340    TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1341            edits.getFineIterator(), edits.getFineIterator(),
1342            upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1343            TRUE, errorCode);
1344
1345    edits.reset();
1346#if !UCONFIG_NO_BREAK_ITERATION
1347    length = CaseMap::utf8ToTitle("nl",
1348                                  U_OMIT_UNCHANGED_TEXT |
1349                                  U_TITLECASE_NO_BREAK_ADJUSTMENT |
1350                                  U_TITLECASE_NO_LOWERCASE,
1351                                  nullptr, u8"IjssEL IglOo", 12,
1352                                  dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1353    assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1354                 UnicodeString::fromUTF8(StringPiece(dest, length)));
1355    static const EditChange titleExpectedChanges[] = {
1356            { FALSE, 1, 1 },
1357            { TRUE, 1, 1 },
1358            { FALSE, 10, 10 }
1359    };
1360    TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1361            edits.getFineIterator(), edits.getFineIterator(),
1362            titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1363            TRUE, errorCode);
1364#endif
1365
1366    // No explicit nor automatic edits.reset(). Edits should be appended.
1367    length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET |
1368                                   U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1369                               u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1370    assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1371                 UnicodeString::fromUTF8(StringPiece(dest, length)));
1372    static const EditChange foldExpectedChanges[] = {
1373#if !UCONFIG_NO_BREAK_ITERATION
1374            // From titlecasing.
1375            { FALSE, 1, 1 },
1376            { TRUE, 1, 1 },
1377            { FALSE, 10, 10 },
1378#endif
1379            // From case folding.
1380            { TRUE, 1, 2 },
1381            { TRUE, 2, 2 },
1382            { FALSE, 3, 3 },
1383            { TRUE, 1, 1 },
1384            { FALSE, 2, 2 }
1385    };
1386    TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)",
1387            edits.getFineIterator(), edits.getFineIterator(),
1388            foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1389            TRUE, errorCode);
1390}
1391
1392void StringCaseTest::TestCaseMapToString() {
1393    // This test function name is parallel with one in UCharacterCaseTest.java.
1394    // It is a bit of a misnomer until we have CaseMap API that writes to
1395    // a UnicodeString, at which point we should change this code here.
1396    IcuTestErrorCode errorCode(*this, "TestCaseMapToString");
1397    UChar dest[20];
1398
1399    // Omit unchanged text.
1400    int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1401                                      u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1402    assertEquals(u"toLower(IstanBul)",
1403                 UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
1404    length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1405                              u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1406    assertEquals(u"toUpper(Πατάτα)",
1407                 UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1408#if !UCONFIG_NO_BREAK_ITERATION
1409    length = CaseMap::toTitle("nl",
1410                              U_OMIT_UNCHANGED_TEXT |
1411                              U_TITLECASE_NO_BREAK_ADJUSTMENT |
1412                              U_TITLECASE_NO_LOWERCASE,
1413                              nullptr, u"IjssEL IglOo", 12,
1414                              dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1415    assertEquals(u"toTitle(IjssEL IglOo)",
1416                 UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
1417#endif
1418    length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1419                           u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1420    assertEquals(u"foldCase(IßtanBul)",
1421                 UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
1422
1423    // Return the whole result string.
1424    length = CaseMap::toLower("tr", 0,
1425                              u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1426    assertEquals(u"toLower(IstanBul)",
1427                 UnicodeString(u"ıstanbul"), UnicodeString(TRUE, dest, length));
1428    length = CaseMap::toUpper("el", 0,
1429                              u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1430    assertEquals(u"toUpper(Πατάτα)",
1431                 UnicodeString(u"ΠΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1432#if !UCONFIG_NO_BREAK_ITERATION
1433    length = CaseMap::toTitle("nl",
1434                              U_TITLECASE_NO_BREAK_ADJUSTMENT |
1435                              U_TITLECASE_NO_LOWERCASE,
1436                              nullptr, u"IjssEL IglOo", 12,
1437                              dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1438    assertEquals(u"toTitle(IjssEL IglOo)",
1439                 UnicodeString(u"IJssEL IglOo"), UnicodeString(TRUE, dest, length));
1440#endif
1441    length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1442                           u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1443    assertEquals(u"foldCase(IßtanBul)",
1444                 UnicodeString(u"ısstanbul"), UnicodeString(TRUE, dest, length));
1445}
1446
1447void StringCaseTest::TestCaseMapUTF8ToString() {
1448    IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString");
1449    std::string dest;
1450    StringByteSink<std::string> sink(&dest);
1451
1452    // Omit unchanged text.
1453    CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, u8"IstanBul", sink, nullptr, errorCode);
1454    assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString::fromUTF8(dest));
1455    dest.clear();
1456    CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, u8"Πατάτα", sink, nullptr, errorCode);
1457    assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1458                 UnicodeString::fromUTF8(dest));
1459#if !UCONFIG_NO_BREAK_ITERATION
1460    dest.clear();
1461    CaseMap::utf8ToTitle(
1462        "nl", U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1463        nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1464    assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1465                 UnicodeString::fromUTF8(dest));
1466#endif
1467    dest.clear();
1468    CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1469                      u8"IßtanBul", sink, nullptr, errorCode);
1470    assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1471                 UnicodeString::fromUTF8(dest));
1472
1473    // Return the whole result string.
1474    dest.clear();
1475    CaseMap::utf8ToLower("tr", 0, u8"IstanBul", sink, nullptr, errorCode);
1476    assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"),
1477                 UnicodeString::fromUTF8(dest));
1478    dest.clear();
1479    CaseMap::utf8ToUpper("el", 0, u8"Πατάτα", sink, nullptr, errorCode);
1480    assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"),
1481                 UnicodeString::fromUTF8(dest));
1482#if !UCONFIG_NO_BREAK_ITERATION
1483    dest.clear();
1484    CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1485                         nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1486    assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"),
1487                 UnicodeString::fromUTF8(dest));
1488#endif
1489    dest.clear();
1490    CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"IßtanBul", sink, nullptr, errorCode);
1491    assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"),
1492                 UnicodeString::fromUTF8(dest));
1493}
1494
1495void StringCaseTest::TestLongUnicodeString() {
1496    // Code coverage for UnicodeString case mapping code handling
1497    // long strings or many changes in a string.
1498    UnicodeString s(TRUE,
1499        (const UChar *)
1500        u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1501        u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1502        u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1503        u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1504        u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1505        u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
1506    UnicodeString expected(TRUE,
1507        (const UChar *)
1508        u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1509        u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1510        u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1511        u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1512        u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1513        u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
1514    s.toUpper(Locale::getRoot());
1515    assertEquals("string length 306", expected, s);
1516}
1517
1518#if !UCONFIG_NO_BREAK_ITERATION
1519void StringCaseTest::TestBug13127() {
1520    // Test case crashed when the bug was present.
1521    const char16_t *s16 = u"日本語";
1522    UnicodeString s(TRUE, s16, -1);
1523    s.toTitle(0, Locale::getEnglish());
1524}
1525
1526void StringCaseTest::TestInPlaceTitle() {
1527    // Similar to TestBug13127. u_strToTitle() can modify the buffer in-place.
1528    IcuTestErrorCode errorCode(*this, "TestInPlaceTitle");
1529    char16_t s[32] = u"ß ß ß日本語 abcdef";
1530    const char16_t *expected = u"Ss Ss Ss日本語 Abcdef";
1531    int32_t length = u_strToTitle(s, UPRV_LENGTHOF(s), s, -1, nullptr, "", errorCode);
1532    assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length);
1533    assertEquals("u_strToTitle(in-place)", expected, s);
1534}
1535#endif
1536