1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9#include "ustrtest.h"
10#include "unicode/appendable.h"
11#include "unicode/std_string.h"
12#include "unicode/unistr.h"
13#include "unicode/uchar.h"
14#include "unicode/ustring.h"
15#include "unicode/locid.h"
16#include "unicode/ucnv.h"
17#include "unicode/uenum.h"
18#include "unicode/utf16.h"
19#include "cmemory.h"
20#include "charstr.h"
21
22#if 0
23#include "unicode/ustream.h"
24
25#include <iostream>
26using namespace std;
27
28#endif
29
30UnicodeStringTest::~UnicodeStringTest() {}
31
32void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
33{
34    if (exec) logln("TestSuite UnicodeStringTest: ");
35    TESTCASE_AUTO_BEGIN;
36    TESTCASE_AUTO_CLASS(StringCaseTest);
37    TESTCASE_AUTO(TestBasicManipulation);
38    TESTCASE_AUTO(TestCompare);
39    TESTCASE_AUTO(TestExtract);
40    TESTCASE_AUTO(TestRemoveReplace);
41    TESTCASE_AUTO(TestSearching);
42    TESTCASE_AUTO(TestSpacePadding);
43    TESTCASE_AUTO(TestPrefixAndSuffix);
44    TESTCASE_AUTO(TestFindAndReplace);
45    TESTCASE_AUTO(TestBogus);
46    TESTCASE_AUTO(TestReverse);
47    TESTCASE_AUTO(TestMiscellaneous);
48    TESTCASE_AUTO(TestStackAllocation);
49    TESTCASE_AUTO(TestUnescape);
50    TESTCASE_AUTO(TestCountChar32);
51    TESTCASE_AUTO(TestStringEnumeration);
52    TESTCASE_AUTO(TestNameSpace);
53    TESTCASE_AUTO(TestUTF32);
54    TESTCASE_AUTO(TestUTF8);
55    TESTCASE_AUTO(TestReadOnlyAlias);
56    TESTCASE_AUTO(TestAppendable);
57    TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
58    TESTCASE_AUTO(TestSizeofUnicodeString);
59    TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
60    TESTCASE_AUTO(TestMoveSwap);
61    TESTCASE_AUTO_END;
62}
63
64void
65UnicodeStringTest::TestBasicManipulation()
66{
67    UnicodeString   test1("Now is the time for all men to come swiftly to the aid of the party.\n");
68    UnicodeString   expectedValue;
69    UnicodeString   *c;
70
71    c=(UnicodeString *)test1.clone();
72    test1.insert(24, "good ");
73    expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
74    if (test1 != expectedValue)
75        errln("insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
76
77    c->insert(24, "good ");
78    if(*c != expectedValue) {
79        errln("clone()->insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
80    }
81    delete c;
82
83    test1.remove(41, 8);
84    expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
85    if (test1 != expectedValue)
86        errln("remove() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
87
88    test1.replace(58, 6, "ir country");
89    expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
90    if (test1 != expectedValue)
91        errln("replace() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
92
93    UChar     temp[80];
94    test1.extract(0, 15, temp);
95
96    UnicodeString       test2(temp, 15);
97
98    expectedValue = "Now is the time";
99    if (test2 != expectedValue)
100        errln("extract() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
101
102    test2 += " for me to go!\n";
103    expectedValue = "Now is the time for me to go!\n";
104    if (test2 != expectedValue)
105        errln("operator+=() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
106
107    if (test1.length() != 70)
108        errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
109    if (test2.length() != 30)
110        errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
111
112    UnicodeString test3;
113    test3.append((UChar32)0x20402);
114    if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
115        errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
116    }
117    if(test3.length() != 2){
118        errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
119    }
120    test3.append((UChar32)0x0074);
121    if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
122        errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
123    }
124    if(test3.length() != 3){
125        errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
126    }
127
128    // test some UChar32 overloads
129    if( test3.setTo((UChar32)0x10330).length() != 2 ||
130        test3.insert(0, (UChar32)0x20100).length() != 4 ||
131        test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
132        (test3 = (UChar32)0x14001).length() != 2
133    ) {
134        errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
135    }
136
137    {
138        // test moveIndex32()
139        UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
140
141        if(
142            s.moveIndex32(2, -1)!=0 ||
143            s.moveIndex32(2, 1)!=4 ||
144            s.moveIndex32(2, 2)!=5 ||
145            s.moveIndex32(5, -2)!=2 ||
146            s.moveIndex32(0, -1)!=0 ||
147            s.moveIndex32(6, 1)!=6
148        ) {
149            errln("UnicodeString::moveIndex32() failed");
150        }
151
152        if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
153            errln("UnicodeString::getChar32Start() failed");
154        }
155
156        if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
157            errln("UnicodeString::getChar32Limit() failed");
158        }
159    }
160
161    {
162        // test new 2.2 constructors and setTo function that parallel Java's substring function.
163        UnicodeString src("Hello folks how are you?");
164        UnicodeString target1("how are you?");
165        if (target1 != UnicodeString(src, 12)) {
166            errln("UnicodeString(const UnicodeString&, int32_t) failed");
167        }
168        UnicodeString target2("folks");
169        if (target2 != UnicodeString(src, 6, 5)) {
170            errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
171        }
172        if (target1 != target2.setTo(src, 12)) {
173            errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
174        }
175    }
176
177    {
178        // op+ is new in ICU 2.8
179        UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
180        if(s!=UnicodeString("abcdefghi", "")) {
181            errln("operator+(UniStr, UniStr) failed");
182        }
183    }
184
185    {
186        // tests for Jitterbug 2360
187        // verify that APIs with source pointer + length accept length == -1
188        // mostly test only where modified, only few functions did not already do this
189        if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
190            errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
191        }
192
193        UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
194        UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
195
196        if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
197            errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
198        }
199        if(t.length()!=u_strlen(buffer)) {
200            errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
201        }
202
203        if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
204            errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
205        }
206        if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
207            errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
208        }
209
210        buffer[u_strlen(buffer)]=0xe4;
211        UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
212        if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
213            errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
214        }
215        if(u.length()!=UPRV_LENGTHOF(buffer)) {
216            errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
217        }
218
219        static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
220        UConverter *cnv;
221        UErrorCode errorCode=U_ZERO_ERROR;
222
223        cnv=ucnv_open("ISO-8859-1", &errorCode);
224        UnicodeString v(cs, -1, cnv, errorCode);
225        ucnv_close(cnv);
226        if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
227            errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
228        }
229    }
230
231#if U_CHARSET_IS_UTF8
232    {
233        // Test the hardcoded-UTF-8 UnicodeString optimizations.
234        static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
235        static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
236        UnicodeString from8a = UnicodeString((const char *)utf8);
237        UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
238        UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16));
239        if(from8a != from16 || from8b != from16) {
240            errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
241        }
242        char buffer[16];
243        int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
244        if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
245            errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
246        }
247        length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
248        if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
249            errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
250        }
251    }
252#endif
253}
254
255void
256UnicodeStringTest::TestCompare()
257{
258    UnicodeString   test1("this is a test");
259    UnicodeString   test2("this is a test");
260    UnicodeString   test3("this is a test of the emergency broadcast system");
261    UnicodeString   test4("never say, \"this is a test\"!!");
262
263    UnicodeString   test5((UChar)0x5000);
264    UnicodeString   test6((UChar)0x5100);
265
266    UChar         uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
267                 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
268    char            chars[] = "this is a test";
269
270    // test operator== and operator!=
271    if (test1 != test2 || test1 == test3 || test1 == test4)
272        errln("operator== or operator!= failed");
273
274    // test operator> and operator<
275    if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
276        !(test5 < test6)
277    ) {
278        errln("operator> or operator< failed");
279    }
280
281    // test operator>= and operator<=
282    if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
283        errln("operator>= or operator<= failed");
284
285    // test compare(UnicodeString)
286    if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
287        errln("compare(UnicodeString) failed");
288
289    //test compare(offset, length, UnicodeString)
290    if(test1.compare(0, 14, test2) != 0 ||
291        test3.compare(0, 14, test2) != 0 ||
292        test4.compare(12, 14, test2) != 0 ||
293        test3.compare(0, 18, test1) <=0  )
294        errln("compare(offset, length, UnicodeString) failes");
295
296    // test compare(UChar*)
297    if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
298        errln("compare(UChar*) failed");
299
300    // test compare(char*)
301    if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
302        errln("compare(char*) failed");
303
304    // test compare(UChar*, length)
305    if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
306        errln("compare(UChar*, length) failed");
307
308    // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
309    if (test1.compare(0, 14, test2, 0, 14) != 0
310    || test1.compare(0, 14, test3, 0, 14) != 0
311    || test1.compare(0, 14, test4, 12, 14) != 0)
312        errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
313
314    if (test1.compare(10, 4, test2, 0, 4) >= 0
315    || test1.compare(10, 4, test3, 22, 9) <= 0
316    || test1.compare(10, 4, test4, 22, 4) != 0)
317        errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
318
319    // test compareBetween
320    if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
321                    || test1.compareBetween(0, 14, test4, 12, 26) != 0)
322        errln("compareBetween failed");
323
324    if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
325                    || test1.compareBetween(10, 14, test4, 22, 26) != 0)
326        errln("compareBetween failed");
327
328    // test compare() etc. with strings that share a buffer but are not equal
329    test2=test1; // share the buffer, length() too large for the stackBuffer
330    test2.truncate(1); // change only the length, not the buffer
331    if( test1==test2 || test1<=test2 ||
332        test1.compare(test2)<=0 ||
333        test1.compareCodePointOrder(test2)<=0 ||
334        test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
335        test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
336        test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
337        test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
338    ) {
339        errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
340    }
341
342    /* test compareCodePointOrder() */
343    {
344        /* these strings are in ascending order */
345        static const UChar strings[][4]={
346            { 0x61, 0 },                    /* U+0061 */
347            { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
348            { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
349            { 0xd800, 0 },                  /* U+d800 */
350            { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
351            { 0xdfff, 0 },                  /* U+dfff */
352            { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
353            { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
354            { 0xd800, 0xdc02, 0 },          /* U+10002 */
355            { 0xd84d, 0xdc56, 0 }           /* U+23456 */
356        };
357        UnicodeString u[20]; // must be at least as long as strings[]
358        int32_t i;
359
360        for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
361            u[i]=UnicodeString(TRUE, strings[i], -1);
362        }
363
364        for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
365            if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
366                errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
367            }
368        }
369    }
370
371    /* test caseCompare() */
372    {
373        static const UChar
374        _mixed[]=               { 0x61, 0x42, 0x131, 0x3a3, 0xdf,       0x130,       0x49,  0xfb03,           0xd93f, 0xdfff, 0 },
375        _otherDefault[]=        { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69,  0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
376        _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69,        0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
377        _different[]=           { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130,       0x49,  0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
378
379        UnicodeString
380            mixed(TRUE, _mixed, -1),
381            otherDefault(TRUE, _otherDefault, -1),
382            otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
383            different(TRUE, _different, -1);
384
385        int8_t result;
386
387        /* test caseCompare() */
388        result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
389        if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
390            errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
391        }
392        result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
393        if(result!=0) {
394            errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
395        }
396        result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
397        if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
398            errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
399        }
400
401        /* test caseCompare() */
402        result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
403        if(result<=0) {
404            errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
405        }
406
407        /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
408        result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
409        if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
410            errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
411        }
412
413        /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
414        result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
415        if(result<=0) {
416            errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
417        }
418    }
419
420    // test that srcLength=-1 is handled in functions that
421    // take input const UChar */int32_t srcLength (j785)
422    {
423        static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
424        UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
425
426        if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
427            errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
428        }
429
430        if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
431            errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
432        }
433
434        if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
435            errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
436        }
437
438        if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
439            errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
440        }
441
442        if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
443            errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
444        }
445
446        UnicodeString s2, s3;
447        s2.replace(0, 0, u+1, -1);
448        s3.replace(0, 0, u, 1, -1);
449        if(s.compare(1, 999, s2)!=0 || s2!=s3) {
450            errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
451        }
452    }
453}
454
455void
456UnicodeStringTest::TestExtract()
457{
458    UnicodeString  test1("Now is the time for all good men to come to the aid of their country.", "");
459    UnicodeString  test2;
460    UChar          test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
461    char           test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
462    UnicodeString  test5;
463    char           test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
464
465    test1.extract(11, 12, test2);
466    test1.extract(11, 12, test3);
467    if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
468        errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
469    }
470
471    // test proper pinning in extractBetween()
472    test1.extractBetween(-3, 7, test5);
473    if(test5!=UNICODE_STRING("Now is ", 7)) {
474        errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
475    }
476
477    test1.extractBetween(11, 23, test5);
478    if (test1.extract(60, 71, test6) != 9) {
479        errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
480    }
481    if (test1.extract(11, 12, test6) != 12) {
482        errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
483    }
484
485    // convert test4 back to Unicode for comparison
486    UnicodeString test4b(test4, 12);
487
488    if (test1.extract(11, 12, (char *)NULL) != 12) {
489        errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
490    }
491    if (test1.extract(11, -1, test6) != 0) {
492        errln("UnicodeString.extract(-1) failed to stop reading the string.");
493    }
494
495    for (int32_t i = 0; i < 12; i++) {
496        if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
497            errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
498            break;
499        }
500        if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
501            errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
502            break;
503        }
504        if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
505            errln(UnicodeString("extracting into an array of char failed at position ") + i);
506            break;
507        }
508        if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
509            errln(UnicodeString("extracting with extractBetween failed at position ") + i);
510            break;
511        }
512    }
513
514    // test preflighting and overflows with invariant conversion
515    if (test1.extract(0, 10, (char *)NULL, "") != 10) {
516        errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
517    }
518
519    test4[2] = (char)0xff;
520    if (test1.extract(0, 10, test4, 2, "") != 10) {
521        errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
522    }
523    if (test4[2] != (char)0xff) {
524        errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
525    }
526
527    {
528        // test new, NUL-terminating extract() function
529        UnicodeString s("terminate", "");
530        UChar dest[20]={
531            0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
532            0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
533        };
534        UErrorCode errorCode;
535        int32_t length;
536
537        errorCode=U_ZERO_ERROR;
538        length=s.extract((UChar *)NULL, 0, errorCode);
539        if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
540            errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
541        }
542
543        errorCode=U_ZERO_ERROR;
544        length=s.extract(dest, s.length()-1, errorCode);
545        if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
546            errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
547                length, u_errorName(errorCode), s.length());
548        }
549
550        errorCode=U_ZERO_ERROR;
551        length=s.extract(dest, s.length(), errorCode);
552        if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
553            errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
554                length, u_errorName(errorCode), s.length());
555        }
556        if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
557            errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
558        }
559
560        errorCode=U_ZERO_ERROR;
561        length=s.extract(dest, s.length()+1, errorCode);
562        if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
563            errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
564                length, u_errorName(errorCode), s.length());
565        }
566        if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
567            errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
568        }
569    }
570
571    {
572        // test new UConverter extract() and constructor
573        UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
574        char buffer[32];
575        static const char expect[]={
576            (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
577            (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
578            (char)0xc3, (char)0x84,
579            (char)0xe1, (char)0xbb, (char)0x90
580        };
581        UErrorCode errorCode=U_ZERO_ERROR;
582        UConverter *cnv=ucnv_open("UTF-8", &errorCode);
583        int32_t length;
584
585        if(U_SUCCESS(errorCode)) {
586            // test preflighting
587            if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
588                errorCode!=U_BUFFER_OVERFLOW_ERROR
589            ) {
590                errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
591                      length, u_errorName(errorCode));
592            }
593            errorCode=U_ZERO_ERROR;
594            if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
595                errorCode!=U_BUFFER_OVERFLOW_ERROR
596            ) {
597                errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
598                      length, u_errorName(errorCode));
599            }
600
601            // try error cases
602            errorCode=U_ZERO_ERROR;
603            if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
604                errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
605            }
606            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
607            if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
608                errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
609            }
610            errorCode=U_ZERO_ERROR;
611
612            // extract for real
613            if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
614                uprv_memcmp(buffer, expect, 13)!=0 ||
615                buffer[13]!=0 ||
616                U_FAILURE(errorCode)
617            ) {
618                errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
619                      length, u_errorName(errorCode));
620            }
621            // Test again with just the converter name.
622            if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
623                uprv_memcmp(buffer, expect, 13)!=0 ||
624                buffer[13]!=0 ||
625                U_FAILURE(errorCode)
626            ) {
627                errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
628                      length, u_errorName(errorCode));
629            }
630
631            // try the constructor
632            UnicodeString t(expect, sizeof(expect), cnv, errorCode);
633            if(U_FAILURE(errorCode) || s!=t) {
634                errln("UnicodeString(UConverter) conversion failed (%s)",
635                      u_errorName(errorCode));
636            }
637
638            ucnv_close(cnv);
639        }
640    }
641}
642
643void
644UnicodeStringTest::TestRemoveReplace()
645{
646    UnicodeString   test1("The rain in Spain stays mainly on the plain");
647    UnicodeString   test2("eat SPAMburgers!");
648    UChar         test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
649    char            test4[] = "SPAM";
650    UnicodeString&  test5 = test1;
651
652    test1.replace(4, 4, test2, 4, 4);
653    test1.replace(12, 5, test3, 4);
654    test3[4] = 0;
655    test1.replace(17, 4, test3);
656    test1.replace(23, 4, test4);
657    test1.replaceBetween(37, 42, test2, 4, 8);
658
659    if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
660        errln("One of the replace methods failed:\n"
661              "  expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
662              "  got \"" + test1 + "\"");
663
664    test1.remove(21, 1);
665    test1.removeBetween(26, 28);
666
667    if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
668        errln("One of the remove methods failed:\n"
669              "  expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
670              "  got \"" + test1 + "\"");
671
672    for (int32_t i = 0; i < test1.length(); i++) {
673        if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
674            test1.setCharAt(i, 0x78);
675        }
676    }
677
678    if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
679        errln("One of the remove methods failed:\n"
680              "  expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
681              "  got \"" + test1 + "\"");
682
683    test1.remove();
684    if (test1.length() != 0)
685        errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
686}
687
688void
689UnicodeStringTest::TestSearching()
690{
691    UnicodeString test1("test test ttest tetest testesteststt");
692    UnicodeString test2("test");
693    UChar testChar = 0x74;
694
695    UChar32 testChar32 = 0x20402;
696    UChar testData[]={
697        //   0       1       2       3       4       5       6       7
698        0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
699
700        //   8       9      10      11      12      13      14      15
701        0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
702
703        //  16      17      18      19
704        0xdc02, 0xd841, 0x0073, 0x0000
705    };
706    UnicodeString test3(testData);
707    UnicodeString test4(testChar32);
708
709    uint16_t occurrences = 0;
710    int32_t startPos = 0;
711    for ( ;
712          startPos != -1 && startPos < test1.length();
713          (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
714        ;
715    if (occurrences != 6)
716        errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
717
718    for ( occurrences = 0, startPos = 10;
719          startPos != -1 && startPos < test1.length();
720          (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
721        ;
722    if (occurrences != 4)
723        errln(UnicodeString("indexOf with starting offset failed: "
724                            "expected to find 4 occurrences, found ") + occurrences);
725
726    int32_t endPos = 28;
727    for ( occurrences = 0, startPos = 5;
728          startPos != -1 && startPos < test1.length();
729          (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
730        ;
731    if (occurrences != 4)
732        errln(UnicodeString("indexOf with starting and ending offsets failed: "
733                            "expected to find 4 occurrences, found ") + occurrences);
734
735    //using UChar32 string
736    for ( startPos=0, occurrences=0;
737          startPos != -1 && startPos < test3.length();
738          (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
739        ;
740    if (occurrences != 4)
741        errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
742
743    for ( startPos=10, occurrences=0;
744          startPos != -1 && startPos < test3.length();
745          (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
746        ;
747    if (occurrences != 2)
748        errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
749    //---
750
751    for ( occurrences = 0, startPos = 0;
752          startPos != -1 && startPos < test1.length();
753          (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
754        ;
755    if (occurrences != 16)
756        errln(UnicodeString("indexOf with character failed: "
757                            "expected to find 16 occurrences, found ") + occurrences);
758
759    for ( occurrences = 0, startPos = 10;
760          startPos != -1 && startPos < test1.length();
761          (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
762        ;
763    if (occurrences != 12)
764        errln(UnicodeString("indexOf with character & start offset failed: "
765                            "expected to find 12 occurrences, found ") + occurrences);
766
767    for ( occurrences = 0, startPos = 5, endPos = 28;
768          startPos != -1 && startPos < test1.length();
769          (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
770        ;
771    if (occurrences != 10)
772        errln(UnicodeString("indexOf with character & start & end offsets failed: "
773                            "expected to find 10 occurrences, found ") + occurrences);
774
775    //testing for UChar32
776    UnicodeString subString;
777    for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
778        subString.append(test3, startPos, test3.length());
779        if(subString.indexOf(testChar32) != -1 ){
780             ++occurrences;
781        }
782        subString.remove();
783    }
784    if (occurrences != 14)
785        errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
786
787    for ( occurrences = 0, startPos = 0;
788          startPos != -1 && startPos < test3.length();
789          (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
790        ;
791    if (occurrences != 4)
792        errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
793
794    endPos=test3.length();
795    for ( occurrences = 0, startPos = 5;
796          startPos != -1 && startPos < test3.length();
797          (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
798        ;
799    if (occurrences != 3)
800        errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
801    //---
802
803    if(test1.lastIndexOf(test2)!=29) {
804        errln("test1.lastIndexOf(test2)!=29");
805    }
806
807    if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
808        errln("test1.lastIndexOf(test2, start) failed");
809    }
810
811    for ( occurrences = 0, startPos = 32;
812          startPos != -1;
813          (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
814        ;
815    if (occurrences != 4)
816        errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
817                            "expected to find 4 occurrences, found ") + occurrences);
818
819    for ( occurrences = 0, startPos = 32;
820          startPos != -1;
821          (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
822        ;
823    if (occurrences != 11)
824        errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
825                            "expected to find 11 occurrences, found ") + occurrences);
826
827    //testing UChar32
828    startPos=test3.length();
829    for ( occurrences = 0;
830          startPos != -1;
831          (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
832        ;
833    if (occurrences != 3)
834        errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
835
836
837    for ( occurrences = 0, endPos = test3.length();  endPos > 0; endPos -= 1){
838        subString.remove();
839        subString.append(test3, 0, endPos);
840        if(subString.lastIndexOf(testChar32) != -1 ){
841            ++occurrences;
842        }
843    }
844    if (occurrences != 18)
845        errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
846    //---
847
848    // test that indexOf(UChar32) and lastIndexOf(UChar32)
849    // do not find surrogate code points when they are part of matched pairs
850    // (= part of supplementary code points)
851    // Jitterbug 1542
852    if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
853        errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
854    }
855    if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
856        UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
857        test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
858    ) {
859        errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
860    }
861}
862
863void
864UnicodeStringTest::TestSpacePadding()
865{
866    UnicodeString test1("hello");
867    UnicodeString test2("   there");
868    UnicodeString test3("Hi!  How ya doin'?  Beautiful day, isn't it?");
869    UnicodeString test4;
870    UBool returnVal;
871    UnicodeString expectedValue;
872
873    returnVal = test1.padLeading(15);
874    expectedValue = "          hello";
875    if (returnVal == FALSE || test1 != expectedValue)
876        errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
877
878    returnVal = test2.padTrailing(15);
879    expectedValue = "   there       ";
880    if (returnVal == FALSE || test2 != expectedValue)
881        errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
882
883    expectedValue = test3;
884    returnVal = test3.padTrailing(15);
885    if (returnVal == TRUE || test3 != expectedValue)
886        errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
887
888    expectedValue = "hello";
889    test4.setTo(test1).trim();
890
891    if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
892        errln("trim(UnicodeString&) failed");
893
894    test1.trim();
895    if (test1 != expectedValue)
896        errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
897
898    test2.trim();
899    expectedValue = "there";
900    if (test2 != expectedValue)
901        errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
902
903    test3.trim();
904    expectedValue = "Hi!  How ya doin'?  Beautiful day, isn't it?";
905    if (test3 != expectedValue)
906        errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
907
908    returnVal = test1.truncate(15);
909    expectedValue = "hello";
910    if (returnVal == TRUE || test1 != expectedValue)
911        errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
912
913    returnVal = test2.truncate(15);
914    expectedValue = "there";
915    if (returnVal == TRUE || test2 != expectedValue)
916        errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
917
918    returnVal = test3.truncate(15);
919    expectedValue = "Hi!  How ya doi";
920    if (returnVal == FALSE || test3 != expectedValue)
921        errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
922}
923
924void
925UnicodeStringTest::TestPrefixAndSuffix()
926{
927    UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
928    UnicodeString test2("Now");
929    UnicodeString test3("country.");
930    UnicodeString test4("count");
931
932    if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
933        errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
934    }
935
936    if (test1.startsWith(test3) ||
937        test1.startsWith(test3.getBuffer(), test3.length()) ||
938        test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
939    ) {
940        errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
941    }
942
943    if (test1.endsWith(test2)) {
944        errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
945    }
946
947    if (!test1.endsWith(test3)) {
948        errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
949    }
950    if (!test1.endsWith(test3, 0, INT32_MAX)) {
951        errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
952    }
953
954    if(!test1.endsWith(test3.getBuffer(), test3.length())) {
955        errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
956    }
957    if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
958        errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
959    }
960
961    if (!test3.startsWith(test4)) {
962        errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
963    }
964
965    if (test4.startsWith(test3)) {
966        errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
967    }
968}
969
970void
971UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
972    UnicodeString test("abcde");
973    const UChar ab[] = { 0x61, 0x62, 0 };
974    const UChar de[] = { 0x64, 0x65, 0 };
975    assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
976    assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
977    assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
978    assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
979}
980
981void
982UnicodeStringTest::TestFindAndReplace()
983{
984    UnicodeString test1("One potato, two potato, three potato, four\n");
985    UnicodeString test2("potato");
986    UnicodeString test3("MISSISSIPPI");
987
988    UnicodeString expectedValue;
989
990    test1.findAndReplace(test2, test3);
991    expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
992    if (test1 != expectedValue)
993        errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
994    test1.findAndReplace(2, 32, test3, test2);
995    expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
996    if (test1 != expectedValue)
997        errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
998}
999
1000void
1001UnicodeStringTest::TestReverse()
1002{
1003    UnicodeString test("backwards words say to used I");
1004
1005    test.reverse();
1006    test.reverse(2, 4);
1007    test.reverse(7, 2);
1008    test.reverse(10, 3);
1009    test.reverse(14, 5);
1010    test.reverse(20, 9);
1011
1012    if (test != "I used to say words backwards")
1013        errln("reverse() failed:  Expected \"I used to say words backwards\",\n got \""
1014            + test + "\"");
1015
1016    test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1017    test.reverse();
1018    if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1019        errln("reverse() failed with supplementary characters");
1020    }
1021
1022    // Test case for ticket #8091:
1023    // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1024    // an odd-length string that contains no other lead surrogates.
1025    test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1026    UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1027    test.reverse();
1028    if(test!=expected) {
1029        errln("reverse() failed with only lead surrogate in the middle");
1030    }
1031}
1032
1033void
1034UnicodeStringTest::TestMiscellaneous()
1035{
1036    UnicodeString   test1("This is a test");
1037    UnicodeString   test2("This is a test");
1038    UnicodeString   test3("Me too!");
1039
1040    // test getBuffer(minCapacity) and releaseBuffer()
1041    test1=UnicodeString(); // make sure that it starts with its stackBuffer
1042    UChar *p=test1.getBuffer(20);
1043    if(test1.getCapacity()<20) {
1044        errln("UnicodeString::getBuffer(20).getCapacity()<20");
1045    }
1046
1047    test1.append((UChar)7); // must not be able to modify the string here
1048    test1.setCharAt(3, 7);
1049    test1.reverse();
1050    if( test1.length()!=0 ||
1051        test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1052        test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1053    ) {
1054        errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1055    }
1056
1057    p[0]=1;
1058    p[1]=2;
1059    p[2]=3;
1060    test1.releaseBuffer(3);
1061    test1.append((UChar)4);
1062
1063    if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1064        errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1065    }
1066
1067    // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1068    test1.releaseBuffer(1);
1069    if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1070        errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1071    }
1072
1073    // test getBuffer(const)
1074    const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1075    if( test1.length()!=4 ||
1076        q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1077        r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1078    ) {
1079        errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1080    }
1081
1082    // test releaseBuffer() with a NUL-terminated buffer
1083    test1.getBuffer(20)[2]=0;
1084    test1.releaseBuffer(); // implicit -1
1085    if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1086        errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1087    }
1088
1089    // test releaseBuffer() with a non-NUL-terminated buffer
1090    p=test1.getBuffer(256);
1091    for(int32_t i=0; i<test1.getCapacity(); ++i) {
1092        p[i]=(UChar)1;      // fill the buffer with all non-NUL code units
1093    }
1094    test1.releaseBuffer();  // implicit -1
1095    if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1096        errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1097    }
1098
1099    // test getTerminatedBuffer()
1100    test1=UnicodeString("This is another test.", "");
1101    test2=UnicodeString("This is another test.", "");
1102    q=test1.getTerminatedBuffer();
1103    if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1104        errln("getTerminatedBuffer()[length]!=0");
1105    }
1106
1107    const UChar u[]={ 5, 6, 7, 8, 0 };
1108    test1.setTo(FALSE, u, 3);
1109    q=test1.getTerminatedBuffer();
1110    if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1111        errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1112    }
1113
1114    test1.setTo(TRUE, u, -1);
1115    q=test1.getTerminatedBuffer();
1116    if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1117        errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1118    }
1119
1120    test1=UNICODE_STRING("la", 2);
1121    test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1);
1122    if(test1!=UNICODE_STRING("la lila", 7)) {
1123        errln("UnicodeString::append(const UChar *, start, length) failed");
1124    }
1125
1126    test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX);
1127    if(test1!=UNICODE_STRING("la dudum lila", 13)) {
1128        errln("UnicodeString::insert(start, const UniStr &, start, length) failed");
1129    }
1130
1131    static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1132    test1.insert(9, ucs, -1);
1133    if(test1!=UNICODE_STRING("la dudum hm lila", 16)) {
1134        errln("UnicodeString::insert(start, const UChar *, length) failed");
1135    }
1136
1137    test1.replace(9, 2, (UChar)0x2b);
1138    if(test1!=UNICODE_STRING("la dudum + lila", 15)) {
1139        errln("UnicodeString::replace(start, length, UChar) failed");
1140    }
1141
1142    if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1143        errln("UnicodeString::hasMetaData() returns TRUE");
1144    }
1145
1146    // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1147    test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1148    test1.truncate(36);  // ensure length()<getCapacity()
1149    test2=test1;  // share the buffer
1150    test1.truncate(5);
1151    if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1152        errln("UnicodeString(shared buffer).truncate() failed");
1153    }
1154    if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1155        errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1156              "modified another copy of the string!");
1157    }
1158    test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1159    test1.truncate(36);  // ensure length()<getCapacity()
1160    test2=test1;  // share the buffer
1161    test1.remove();
1162    if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1163        errln("UnicodeString(shared buffer).remove() failed");
1164    }
1165    if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1166        errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1167              "modified another copy of the string!");
1168    }
1169
1170    // ticket #9740
1171    test1.setTo(TRUE, ucs, 3);
1172    assertEquals("length of read-only alias", 3, test1.length());
1173    test1.trim();
1174    assertEquals("length of read-only alias after trim()", 2, test1.length());
1175    assertEquals("length of terminated buffer of read-only alias + trim()",
1176                 2, u_strlen(test1.getTerminatedBuffer()));
1177}
1178
1179void
1180UnicodeStringTest::TestStackAllocation()
1181{
1182    UChar           testString[] ={
1183        0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1184    UChar           guardWord = 0x4DED;
1185    UnicodeString*  test = 0;
1186
1187    test = new  UnicodeString(testString);
1188    if (*test != "This is a crazy test.")
1189        errln("Test string failed to initialize properly.");
1190    if (guardWord != 0x04DED)
1191        errln("Test string initialization overwrote guard word!");
1192
1193    test->insert(8, "only ");
1194    test->remove(15, 6);
1195    if (*test != "This is only a test.")
1196        errln("Manipulation of test string failed to work right.");
1197    if (guardWord != 0x4DED)
1198        errln("Manipulation of test string overwrote guard word!");
1199
1200    // we have to deinitialize and release the backing store by calling the destructor
1201    // explicitly, since we can't overload operator delete
1202    delete test;
1203
1204    UChar workingBuffer[] = {
1205        0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1206        0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1207        0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1208        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1209        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1210    UChar guardWord2 = 0x4DED;
1211
1212    test = new UnicodeString(workingBuffer, 35, 100);
1213    if (*test != "Now is the time for all men to come")
1214        errln("Stack-allocated backing store failed to initialize correctly.");
1215    if (guardWord2 != 0x4DED)
1216        errln("Stack-allocated backing store overwrote guard word!");
1217
1218    test->insert(24, "good ");
1219    if (*test != "Now is the time for all good men to come")
1220        errln("insert() on stack-allocated UnicodeString didn't work right");
1221    if (guardWord2 != 0x4DED)
1222        errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1223
1224    if (workingBuffer[24] != 0x67)
1225        errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1226
1227    *test += " to the aid of their country.";
1228    if (*test != "Now is the time for all good men to come to the aid of their country.")
1229        errln("Stack-allocated UnicodeString overflow didn't work");
1230    if (guardWord2 != 0x4DED)
1231        errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1232
1233    *test = "ha!";
1234    if (*test != "ha!")
1235        errln("Assignment to stack-allocated UnicodeString didn't work");
1236    if (workingBuffer[0] != 0x4e)
1237        errln("Change to UnicodeString after overflow are still affecting original buffer");
1238    if (guardWord2 != 0x4DED)
1239        errln("Change to UnicodeString after overflow overwrote guard word!");
1240
1241    // test read-only aliasing with setTo()
1242    workingBuffer[0] = 0x20ac;
1243    workingBuffer[1] = 0x125;
1244    workingBuffer[2] = 0;
1245    test->setTo(TRUE, workingBuffer, 2);
1246    if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1247        errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1248    }
1249
1250    UnicodeString *c=(UnicodeString *)test->clone();
1251
1252    workingBuffer[1] = 0x109;
1253    if(test->charAt(1) != 0x109) {
1254        errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1255    }
1256
1257    if(c->length() != 2 || c->charAt(1) != 0x125) {
1258        errln("clone(alias) did not copy the buffer");
1259    }
1260    delete c;
1261
1262    test->setTo(TRUE, workingBuffer, -1);
1263    if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1264        errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1265    }
1266
1267    test->setTo(FALSE, workingBuffer, -1);
1268    if(!test->isBogus()) {
1269        errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1270    }
1271
1272    delete test;
1273
1274    test=new UnicodeString();
1275    UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1276    test->setTo(buffer, 4, 10);
1277    if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1278        test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1279        errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1280    }
1281    delete test;
1282
1283
1284    // test the UChar32 constructor
1285    UnicodeString c32Test((UChar32)0x10ff2a);
1286    if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1287        c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1288    ) {
1289        errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1290    }
1291
1292    // test the (new) capacity constructor
1293    UnicodeString capTest(5, (UChar32)0x2a, 5);
1294    if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1295        capTest.char32At(0) != 0x2a ||
1296        capTest.char32At(4) != 0x2a
1297    ) {
1298        errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1299    }
1300
1301    capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1302    if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1303        capTest.char32At(0) != 0x10ff2a ||
1304        capTest.char32At(4) != 0x10ff2a
1305    ) {
1306        errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1307    }
1308
1309    capTest = UnicodeString(5, (UChar32)0, 0);
1310    if(capTest.length() != 0) {
1311        errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1312    }
1313}
1314
1315/**
1316 * Test the unescape() function.
1317 */
1318void UnicodeStringTest::TestUnescape(void) {
1319    UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1320    UnicodeString OUT("abc");
1321    OUT.append((UChar)0x4567);
1322    OUT.append(" ");
1323    OUT.append((UChar)0xA);
1324    OUT.append((UChar)0xD);
1325    OUT.append(" ");
1326    OUT.append((UChar32)0x00101234);
1327    OUT.append("xyz");
1328    OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1329    UnicodeString result = IN.unescape();
1330    if (result != OUT) {
1331        errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1332              prettify(result) + ", expected " +
1333              prettify(OUT));
1334    }
1335
1336    // test that an empty string is returned in case of an error
1337    if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1338        errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1339    }
1340}
1341
1342/* test code point counting functions --------------------------------------- */
1343
1344/* reference implementation of UnicodeString::hasMoreChar32Than() */
1345static int32_t
1346_refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1347    int32_t count=s.countChar32(start, length);
1348    return count>number;
1349}
1350
1351/* compare the real function against the reference */
1352void
1353UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1354    if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1355        errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1356                start, length, number, s.hasMoreChar32Than(start, length, number));
1357    }
1358}
1359
1360void
1361UnicodeStringTest::TestCountChar32(void) {
1362    {
1363        UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1364
1365        // test countChar32()
1366        // note that this also calls and tests u_countChar32(length>=0)
1367        if(
1368            s.countChar32()!=4 ||
1369            s.countChar32(1)!=4 ||
1370            s.countChar32(2)!=3 ||
1371            s.countChar32(2, 3)!=2 ||
1372            s.countChar32(2, 0)!=0
1373        ) {
1374            errln("UnicodeString::countChar32() failed");
1375        }
1376
1377        // NUL-terminate the string buffer and test u_countChar32(length=-1)
1378        const UChar *buffer=s.getTerminatedBuffer();
1379        if(
1380            u_countChar32(buffer, -1)!=4 ||
1381            u_countChar32(buffer+1, -1)!=4 ||
1382            u_countChar32(buffer+2, -1)!=3 ||
1383            u_countChar32(buffer+3, -1)!=3 ||
1384            u_countChar32(buffer+4, -1)!=2 ||
1385            u_countChar32(buffer+5, -1)!=1 ||
1386            u_countChar32(buffer+6, -1)!=0
1387        ) {
1388            errln("u_countChar32(length=-1) failed");
1389        }
1390
1391        // test u_countChar32() with bad input
1392        if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1393            errln("u_countChar32(bad input) failed (returned non-zero counts)");
1394        }
1395    }
1396
1397    /* test data and variables for hasMoreChar32Than() */
1398    static const UChar str[]={
1399        0x61, 0x62, 0xd800, 0xdc00,
1400        0xd801, 0xdc01, 0x63, 0xd802,
1401        0x64, 0xdc03, 0x65, 0x66,
1402        0xd804, 0xdc04, 0xd805, 0xdc05,
1403        0x67
1404    };
1405    UnicodeString string(str, UPRV_LENGTHOF(str));
1406    int32_t start, length, number;
1407
1408    /* test hasMoreChar32Than() */
1409    for(length=string.length(); length>=0; --length) {
1410        for(start=0; start<=length; ++start) {
1411            for(number=-1; number<=((length-start)+2); ++number) {
1412                _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1413            }
1414        }
1415    }
1416
1417    /* test hasMoreChar32Than() with pinning */
1418    for(start=-1; start<=string.length()+1; ++start) {
1419        for(number=-1; number<=((string.length()-start)+2); ++number) {
1420            _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1421        }
1422    }
1423
1424    /* test hasMoreChar32Than() with a bogus string */
1425    string.setToBogus();
1426    for(length=-1; length<=1; ++length) {
1427        for(start=-1; start<=length; ++start) {
1428            for(number=-1; number<=((length-start)+2); ++number) {
1429                _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1430            }
1431        }
1432    }
1433}
1434
1435void
1436UnicodeStringTest::TestBogus() {
1437    UnicodeString   test1("This is a test");
1438    UnicodeString   test2("This is a test");
1439    UnicodeString   test3("Me too!");
1440
1441    // test isBogus() and setToBogus()
1442    if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1443        errln("A string returned TRUE for isBogus()!");
1444    }
1445
1446    // NULL pointers are treated like empty strings
1447    // use other illegal arguments to make a bogus string
1448    test3.setTo(FALSE, test1.getBuffer(), -2);
1449    if(!test3.isBogus()) {
1450        errln("A bogus string returned FALSE for isBogus()!");
1451    }
1452    if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1453        errln("hashCode() failed");
1454    }
1455    if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1456        errln("bogus.getBuffer()!=0");
1457    }
1458    if (test1.indexOf(test3) != -1) {
1459        errln("bogus.indexOf() != -1");
1460    }
1461    if (test1.lastIndexOf(test3) != -1) {
1462        errln("bogus.lastIndexOf() != -1");
1463    }
1464    if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1465        errln("caseCompare() doesn't work with bogus strings");
1466    }
1467    if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1468        errln("compareCodePointOrder() doesn't work with bogus strings");
1469    }
1470
1471    // verify that non-assignment modifications fail and do not revive a bogus string
1472    test3.setToBogus();
1473    test3.append((UChar)0x61);
1474    if(!test3.isBogus() || test3.getBuffer()!=0) {
1475        errln("bogus.append('a') worked but must not");
1476    }
1477
1478    test3.setToBogus();
1479    test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1480    if(!test3.isBogus() || test3.getBuffer()!=0) {
1481        errln("bogus.findAndReplace() worked but must not");
1482    }
1483
1484    test3.setToBogus();
1485    test3.trim();
1486    if(!test3.isBogus() || test3.getBuffer()!=0) {
1487        errln("bogus.trim() revived bogus but must not");
1488    }
1489
1490    test3.setToBogus();
1491    test3.remove(1);
1492    if(!test3.isBogus() || test3.getBuffer()!=0) {
1493        errln("bogus.remove(1) revived bogus but must not");
1494    }
1495
1496    test3.setToBogus();
1497    if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1498        errln("bogus.setCharAt(0, 'b') worked but must not");
1499    }
1500
1501    test3.setToBogus();
1502    if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1503        errln("bogus.truncate(1) revived bogus but must not");
1504    }
1505
1506    // verify that assignments revive a bogus string
1507    test3.setToBogus();
1508    if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1509        errln("bogus.operator=() failed");
1510    }
1511
1512    test3.setToBogus();
1513    if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1514        errln("bogus.fastCopyFrom() failed");
1515    }
1516
1517    test3.setToBogus();
1518    if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1519        errln("bogus.setTo(UniStr) failed");
1520    }
1521
1522    test3.setToBogus();
1523    if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1524        errln("bogus.setTo(UniStr, 0) failed");
1525    }
1526
1527    test3.setToBogus();
1528    if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1529        errln("bogus.setTo(UniStr, 0, len) failed");
1530    }
1531
1532    test3.setToBogus();
1533    if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1534        errln("bogus.setTo(const UChar *, len) failed");
1535    }
1536
1537    test3.setToBogus();
1538    if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1539        errln("bogus.setTo(UChar) failed");
1540    }
1541
1542    test3.setToBogus();
1543    if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1544        errln("bogus.setTo(UChar32) failed");
1545    }
1546
1547    test3.setToBogus();
1548    if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1549        errln("bogus.setTo(readonly alias) failed");
1550    }
1551
1552    // writable alias to another string's buffer: very bad idea, just convenient for this test
1553    test3.setToBogus();
1554    if(!test3.isBogus() || test3.setTo((UChar *)test1.getBuffer(), test1.length(), test1.getCapacity()).isBogus() || test3!=test1) {
1555        errln("bogus.setTo(writable alias) failed");
1556    }
1557
1558    // verify simple, documented ways to turn a bogus string into an empty one
1559    test3.setToBogus();
1560    if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1561        errln("bogus.operator=(UnicodeString()) failed");
1562    }
1563
1564    test3.setToBogus();
1565    if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1566        errln("bogus.setTo(UnicodeString()) failed");
1567    }
1568
1569    test3.setToBogus();
1570    if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1571        errln("bogus.remove() failed");
1572    }
1573
1574    test3.setToBogus();
1575    if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1576        errln("bogus.remove(0, INT32_MAX) failed");
1577    }
1578
1579    test3.setToBogus();
1580    if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1581        errln("bogus.truncate(0) failed");
1582    }
1583
1584    test3.setToBogus();
1585    if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1586        errln("bogus.setTo((UChar32)-1) failed");
1587    }
1588
1589    static const UChar nul=0;
1590
1591    test3.setToBogus();
1592    if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1593        errln("bogus.setTo(&nul, 0) failed");
1594    }
1595
1596    test3.setToBogus();
1597    if(!test3.isBogus() || test3.getBuffer()!=0) {
1598        errln("setToBogus() failed to make a string bogus");
1599    }
1600
1601    test3.setToBogus();
1602    if(test1.isBogus() || !(test1=test3).isBogus()) {
1603        errln("normal=bogus failed to make the left string bogus");
1604    }
1605
1606    // test that NULL primitive input string values are treated like
1607    // empty strings, not errors (bogus)
1608    test2.setTo((UChar32)0x10005);
1609    if(test2.insert(1, NULL, 1).length()!=2) {
1610        errln("UniStr.insert(...NULL...) should not modify the string but does");
1611    }
1612
1613    UErrorCode errorCode=U_ZERO_ERROR;
1614    UnicodeString
1615        test4((const UChar *)NULL),
1616        test5(TRUE, (const UChar *)NULL, 1),
1617        test6((UChar *)NULL, 5, 5),
1618        test7((const char *)NULL, 3, NULL, errorCode);
1619    if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1620        errln("a constructor set to bogus for a NULL input string, should be empty");
1621    }
1622
1623    test4.setTo(NULL, 3);
1624    test5.setTo(TRUE, (const UChar *)NULL, 1);
1625    test6.setTo((UChar *)NULL, 5, 5);
1626    if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1627        errln("a setTo() set to bogus for a NULL input string, should be empty");
1628    }
1629
1630    // test that bogus==bogus<any
1631    if(test1!=test3 || test1.compare(test3)!=0) {
1632        errln("bogus==bogus failed");
1633    }
1634
1635    test2.remove();
1636    if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1637        errln("bogus<empty failed");
1638    }
1639}
1640
1641// StringEnumeration ------------------------------------------------------- ***
1642// most of StringEnumeration is tested elsewhere
1643// this test improves code coverage
1644
1645static const char *const
1646testEnumStrings[]={
1647    "a",
1648    "b",
1649    "c",
1650    "this is a long string which helps us test some buffer limits",
1651    "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1652};
1653
1654class TestEnumeration : public StringEnumeration {
1655public:
1656    TestEnumeration() : i(0) {}
1657
1658    virtual int32_t count(UErrorCode& /*status*/) const {
1659        return UPRV_LENGTHOF(testEnumStrings);
1660    }
1661
1662    virtual const UnicodeString *snext(UErrorCode &status) {
1663        if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1664            unistr=UnicodeString(testEnumStrings[i++], "");
1665            return &unistr;
1666        }
1667
1668        return NULL;
1669    }
1670
1671    virtual void reset(UErrorCode& /*status*/) {
1672        i=0;
1673    }
1674
1675    static inline UClassID getStaticClassID() {
1676        return (UClassID)&fgClassID;
1677    }
1678    virtual UClassID getDynamicClassID() const {
1679        return getStaticClassID();
1680    }
1681
1682private:
1683    static const char fgClassID;
1684
1685    int32_t i;
1686};
1687
1688const char TestEnumeration::fgClassID=0;
1689
1690void
1691UnicodeStringTest::TestStringEnumeration() {
1692    UnicodeString s;
1693    TestEnumeration ten;
1694    int32_t i, length;
1695    UErrorCode status;
1696
1697    const UChar *pu;
1698    const char *pc;
1699
1700    // test the next() default implementation and ensureCharsCapacity()
1701    for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1702        status=U_ZERO_ERROR;
1703        pc=ten.next(&length, status);
1704        s=UnicodeString(testEnumStrings[i], "");
1705        if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1706            errln("StringEnumeration.next(%d) failed", i);
1707        }
1708    }
1709    status=U_ZERO_ERROR;
1710    if(ten.next(&length, status)!=NULL) {
1711        errln("StringEnumeration.next(done)!=NULL");
1712    }
1713
1714    // test the unext() default implementation
1715    ten.reset(status);
1716    for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1717        status=U_ZERO_ERROR;
1718        pu=ten.unext(&length, status);
1719        s=UnicodeString(testEnumStrings[i], "");
1720        if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1721            errln("StringEnumeration.unext(%d) failed", i);
1722        }
1723    }
1724    status=U_ZERO_ERROR;
1725    if(ten.unext(&length, status)!=NULL) {
1726        errln("StringEnumeration.unext(done)!=NULL");
1727    }
1728
1729    // test that the default clone() implementation works, and returns NULL
1730    if(ten.clone()!=NULL) {
1731        errln("StringEnumeration.clone()!=NULL");
1732    }
1733
1734    // test that uenum_openFromStringEnumeration() works
1735    // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1736    StringEnumeration *newTen = new TestEnumeration;
1737    status=U_ZERO_ERROR;
1738    UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1739    if (uten==NULL || U_FAILURE(status)) {
1740        errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1741        return;
1742    }
1743
1744    // test  uenum_next()
1745    for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1746        status=U_ZERO_ERROR;
1747        pc=uenum_next(uten, &length, &status);
1748        if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1749            errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1750        }
1751    }
1752    status=U_ZERO_ERROR;
1753    if(uenum_next(uten, &length, &status)!=NULL) {
1754        errln("File %s, line %d, uenum_next(done)!=NULL");
1755    }
1756
1757    // test the uenum_unext()
1758    uenum_reset(uten, &status);
1759    for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1760        status=U_ZERO_ERROR;
1761        pu=uenum_unext(uten, &length, &status);
1762        s=UnicodeString(testEnumStrings[i], "");
1763        if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1764            errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1765        }
1766    }
1767    status=U_ZERO_ERROR;
1768    if(uenum_unext(uten, &length, &status)!=NULL) {
1769        errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1770    }
1771
1772    uenum_close(uten);
1773}
1774
1775/*
1776 * Namespace test, to make sure that macros like UNICODE_STRING include the
1777 * namespace qualifier.
1778 *
1779 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1780 */
1781namespace bogus {
1782    class UnicodeString {
1783    public:
1784        enum EInvariant { kInvariant };
1785        UnicodeString() : i(1) {}
1786        UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
1787        UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1788) : i(length) {}
1789    private:
1790        int32_t i;
1791    };
1792}
1793
1794void
1795UnicodeStringTest::TestNameSpace() {
1796    // Provoke name collision unless the UnicodeString macros properly
1797    // qualify the icu::UnicodeString class.
1798    using namespace bogus;
1799
1800    // Use all UnicodeString macros from unistr.h.
1801    icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1802    icu::UnicodeString s2=UNICODE_STRING("def", 3);
1803    icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1804
1805    // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1806    icu::UnicodeString s4=s1+s2+s3;
1807    if(s4.length()!=9) {
1808        errln("Something wrong with UnicodeString::operator+().");
1809    }
1810}
1811
1812void
1813UnicodeStringTest::TestUTF32() {
1814    // Input string length US_STACKBUF_SIZE to cause overflow of the
1815    // initially chosen fStackBuffer due to supplementary characters.
1816    static const UChar32 utf32[] = {
1817        0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1818        0x10000, 0x20000, 0xe0000, 0x10ffff
1819    };
1820    static const UChar expected_utf16[] = {
1821        0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1822        0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1823    };
1824    UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1825    UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1826    if(from32 != expected) {
1827        errln("UnicodeString::fromUTF32() did not create the expected string.");
1828    }
1829
1830    static const UChar utf16[] = {
1831        0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1832    };
1833    static const UChar32 expected_utf32[] = {
1834        0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1835    };
1836    UChar32 result32[16];
1837    UErrorCode errorCode = U_ZERO_ERROR;
1838    int32_t length32 =
1839        UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
1840        toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1841    if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1842        0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1843        result32[length32] != 0
1844    ) {
1845        errln("UnicodeString::toUTF32() did not create the expected string.");
1846    }
1847}
1848
1849class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1850public:
1851    TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1852            : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
1853    virtual void Flush() { calledFlush = TRUE; }
1854    UBool calledFlush;
1855};
1856
1857void
1858UnicodeStringTest::TestUTF8() {
1859    static const uint8_t utf8[] = {
1860        // Code points:
1861        // 0x41, 0xd900,
1862        // 0x61, 0xdc00,
1863        // 0x110000, 0x5a,
1864        // 0x50000, 0x7a,
1865        // 0x10000, 0x20000,
1866        // 0xe0000, 0x10ffff
1867        0x41, 0xed, 0xa4, 0x80,
1868        0x61, 0xed, 0xb0, 0x80,
1869        0xf4, 0x90, 0x80, 0x80, 0x5a,
1870        0xf1, 0x90, 0x80, 0x80, 0x7a,
1871        0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1872        0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1873    };
1874    static const UChar expected_utf16[] = {
1875        0x41, 0xfffd,
1876        0x61, 0xfffd,
1877        0xfffd, 0x5a,
1878        0xd900, 0xdc00, 0x7a,
1879        0xd800, 0xdc00, 0xd840, 0xdc00,
1880        0xdb40, 0xdc00, 0xdbff, 0xdfff
1881    };
1882    UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1883    UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1884
1885    if(from8 != expected) {
1886        errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1887    }
1888#if U_HAVE_STD_STRING
1889    std::string utf8_string((const char *)utf8, sizeof(utf8));
1890    UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1891    if(from8b != expected) {
1892        errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1893    }
1894#endif
1895
1896    static const UChar utf16[] = {
1897        0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1898    };
1899    static const uint8_t expected_utf8[] = {
1900        0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1901        0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1902    };
1903    UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
1904
1905    char buffer[64];
1906    TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1907    us.toUTF8(sink);
1908    if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1909        0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1910    ) {
1911        errln("UnicodeString::toUTF8() did not create the expected string.");
1912    }
1913    if(!sink.calledFlush) {
1914        errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1915    }
1916#if U_HAVE_STD_STRING
1917    // Initial contents for testing that toUTF8String() appends.
1918    std::string result8 = "-->";
1919    std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1920    // Use the return value just for testing.
1921    std::string &result8r = us.toUTF8String(result8);
1922    if(result8r != expected8 || &result8r != &result8) {
1923        errln("UnicodeString::toUTF8String() did not create the expected string.");
1924    }
1925#endif
1926}
1927
1928// Test if this compiler supports Return Value Optimization of unnamed temporary objects.
1929static UnicodeString wrapUChars(const UChar *uchars) {
1930    return UnicodeString(TRUE, uchars, -1);
1931}
1932
1933void
1934UnicodeStringTest::TestReadOnlyAlias() {
1935    UChar uchars[]={ 0x61, 0x62, 0 };
1936    UnicodeString alias(TRUE, uchars, 2);
1937    if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1938        errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1939        return;
1940    }
1941    alias.truncate(1);
1942    if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1943        errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1944    }
1945    if(alias.getTerminatedBuffer()==uchars) {
1946        errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1947              "did not allocate and copy as expected.");
1948    }
1949    if(uchars[1]!=0x62) {
1950        errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1951              "modified the original buffer.");
1952    }
1953    if(1!=u_strlen(alias.getTerminatedBuffer())) {
1954        errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1955              "does not return a buffer terminated at the proper length.");
1956    }
1957
1958    alias.setTo(TRUE, uchars, 2);
1959    if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1960        errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1961        return;
1962    }
1963    alias.remove();
1964    if(alias.length()!=0) {
1965        errln("UnicodeString(read-only-alias).remove() did not work.");
1966    }
1967    if(alias.getTerminatedBuffer()==uchars) {
1968        errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1969              "did not un-alias as expected.");
1970    }
1971    if(uchars[0]!=0x61) {
1972        errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1973              "modified the original buffer.");
1974    }
1975    if(0!=u_strlen(alias.getTerminatedBuffer())) {
1976        errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1977              "does not return a buffer terminated at length 0.");
1978    }
1979
1980    UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1981    alias.setTo(FALSE, longString.getBuffer(), longString.length());
1982    alias.remove(0, 10);
1983    if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1984        errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1985    }
1986    alias.setTo(FALSE, longString.getBuffer(), longString.length());
1987    alias.remove(27, 99);
1988    if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
1989        errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1990    }
1991    alias.setTo(FALSE, longString.getBuffer(), longString.length());
1992    alias.retainBetween(6, 30);
1993    if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
1994        errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
1995    }
1996
1997    UChar abc[]={ 0x61, 0x62, 0x63, 0 };
1998    UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
1999
2000    UnicodeString temp;
2001    temp.fastCopyFrom(longString.tempSubString());
2002    if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2003        errln("UnicodeString.tempSubString() failed");
2004    }
2005    temp.fastCopyFrom(longString.tempSubString(-3, 5));
2006    if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2007        errln("UnicodeString.tempSubString(-3, 5) failed");
2008    }
2009    temp.fastCopyFrom(longString.tempSubString(17));
2010    if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2011        errln("UnicodeString.tempSubString(17) failed");
2012    }
2013    temp.fastCopyFrom(longString.tempSubString(99));
2014    if(!temp.isEmpty()) {
2015        errln("UnicodeString.tempSubString(99) failed");
2016    }
2017    temp.fastCopyFrom(longString.tempSubStringBetween(6));
2018    if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2019        errln("UnicodeString.tempSubStringBetween(6) failed");
2020    }
2021    temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2022    if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2023        errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2024    }
2025    UnicodeString bogusString;
2026    bogusString.setToBogus();
2027    temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2028    if(!temp.isBogus()) {
2029        errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2030    }
2031}
2032
2033void
2034UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2035    static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2036    static const UChar fg[3]={ 0x66, 0x67, 0 };
2037    if(!app.reserveAppendCapacity(12)) {
2038        errln("Appendable.reserve(12) failed");
2039    }
2040    app.appendCodeUnit(0x61);
2041    app.appendCodePoint(0x62);
2042    app.appendCodePoint(0x50000);
2043    app.appendString(cde, 3);
2044    app.appendString(fg, -1);
2045    UChar scratch[3];
2046    int32_t capacity=-1;
2047    UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2048    if(capacity<3) {
2049        errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2050        return;
2051    }
2052    static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2053    u_memcpy(buffer, hij, 3);
2054    app.appendString(buffer, 3);
2055    if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2056        errln("Appendable.append(...) failed");
2057    }
2058    buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2059    if(buffer!=NULL || capacity!=0) {
2060        errln("Appendable.getAppendBuffer(min=0) failed");
2061    }
2062    capacity=1;
2063    buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2064    if(buffer!=NULL || capacity!=0) {
2065        errln("Appendable.getAppendBuffer(scratch<min) failed");
2066    }
2067}
2068
2069class SimpleAppendable : public Appendable {
2070public:
2071    explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
2072    virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
2073    SimpleAppendable &reset() { str.remove(); return *this; }
2074private:
2075    UnicodeString &str;
2076};
2077
2078void
2079UnicodeStringTest::TestAppendable() {
2080    UnicodeString dest;
2081    SimpleAppendable app(dest);
2082    doTestAppendable(dest, app);
2083}
2084
2085void
2086UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2087    UnicodeString dest;
2088    UnicodeStringAppendable app(dest);
2089    doTestAppendable(dest, app);
2090}
2091
2092void
2093UnicodeStringTest::TestSizeofUnicodeString() {
2094    // See the comments in unistr.h near the declaration of UnicodeString's fields.
2095    // See the API comments for UNISTR_OBJECT_SIZE.
2096    size_t sizeofUniStr=sizeof(UnicodeString);
2097    size_t expected=UNISTR_OBJECT_SIZE;
2098    if(expected!=sizeofUniStr) {
2099        // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2100        // of the compiler might add more internal padding than expected.
2101        errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2102              (int)sizeofUniStr, (int)expected);
2103    }
2104    if(sizeofUniStr<32) {
2105        errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2106    }
2107    // We assume that the entire UnicodeString object,
2108    // minus the vtable pointer and 2 bytes for flags and short length,
2109    // is available for internal storage of UChars.
2110    int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2111    UnicodeString s;
2112    const UChar *emptyBuffer=s.getBuffer();
2113    for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2114        s.append((UChar)0x2e);
2115    }
2116    const UChar *fullBuffer=s.getBuffer();
2117    if(fullBuffer!=emptyBuffer) {
2118        errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2119              expectedStackBufferLength);
2120    }
2121    const UChar *terminatedBuffer=s.getTerminatedBuffer();
2122    if(terminatedBuffer==emptyBuffer) {
2123        errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2124              expectedStackBufferLength);
2125    }
2126}
2127
2128void
2129UnicodeStringTest::TestMoveSwap() {
2130    static const UChar abc[3] = { 0x61, 0x62, 0x63 };  // "abc"
2131    UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc));  // read-only alias
2132    UnicodeString s2(100, 0x7a, 100);  // 100 * 'z' should be on the heap
2133    UnicodeString s3("defg", 4, US_INV);  // in stack buffer
2134    const UChar *p = s2.getBuffer();
2135    s1.swap(s2);
2136    if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2137        errln("UnicodeString.swap() did not swap");
2138    }
2139    swap(s2, s3);
2140    if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2141        errln("swap(UnicodeString) did not swap back");
2142    }
2143    UnicodeString s4;
2144    s4.moveFrom(s1);
2145    if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2146        errln("UnicodeString.moveFrom(heap) did not move");
2147    }
2148    UnicodeString s5;
2149    s5.moveFrom(s2);
2150    if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2151        errln("UnicodeString.moveFrom(stack) did not move");
2152    }
2153    UnicodeString s6;
2154    s6.moveFrom(s3);
2155    if(s6.getBuffer() != abc || s6.length() != 3) {
2156        errln("UnicodeString.moveFrom(alias) did not move");
2157    }
2158#if U_HAVE_RVALUE_REFERENCES
2159    infoln("TestMoveSwap() with rvalue references");
2160    s1 = static_cast<UnicodeString &&>(s6);
2161    if(s1.getBuffer() != abc || s1.length() != 3) {
2162        errln("UnicodeString move assignment operator did not move");
2163    }
2164    UnicodeString s7(static_cast<UnicodeString &&>(s4));
2165    if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2166        errln("UnicodeString move constructor did not move");
2167    }
2168#else
2169    infoln("TestMoveSwap() without rvalue references");
2170    UnicodeString s7;
2171#endif
2172
2173    // Move self assignment leaves the object valid but in an undefined state.
2174    // Do it to make sure there is no crash,
2175    // but do not check for any particular resulting value.
2176    s1.moveFrom(s1);
2177    s2.moveFrom(s2);
2178    s3.moveFrom(s3);
2179    s4.moveFrom(s4);
2180    s5.moveFrom(s5);
2181    s6.moveFrom(s6);
2182    s7.moveFrom(s7);
2183    // Simple copy assignment must work.
2184    UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2185    s1 = s6 = s4 = s7 = simple;
2186    if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2187        errln("UnicodeString copy after self-move did not work");
2188    }
2189}
2190