1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7#include "ustrtest.h"
8#include "unicode/std_string.h"
9#include "unicode/unistr.h"
10#include "unicode/uchar.h"
11#include "unicode/ustring.h"
12#include "unicode/locid.h"
13#include "unicode/ucnv.h"
14#include "unicode/uenum.h"
15#include "cmemory.h"
16#include "charstr.h"
17
18#if 0
19#include "unicode/ustream.h"
20
21#if U_IOSTREAM_SOURCE >= 199711
22#include <iostream>
23using namespace std;
24#elif U_IOSTREAM_SOURCE >= 198506
25#include <iostream.h>
26#endif
27
28#endif
29
30#define LENGTHOF(array) (int32_t)((sizeof(array)/sizeof((array)[0])))
31
32UnicodeStringTest::~UnicodeStringTest() {}
33
34void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
35{
36    if (exec) logln("TestSuite UnicodeStringTest: ");
37    switch (index) {
38        case 0:
39            name = "StringCaseTest";
40            if (exec) {
41                logln("StringCaseTest---"); logln("");
42                StringCaseTest test;
43                callTest(test, par);
44            }
45            break;
46        case 1: name = "TestBasicManipulation"; if (exec) TestBasicManipulation(); break;
47        case 2: name = "TestCompare"; if (exec) TestCompare(); break;
48        case 3: name = "TestExtract"; if (exec) TestExtract(); break;
49        case 4: name = "TestRemoveReplace"; if (exec) TestRemoveReplace(); break;
50        case 5: name = "TestSearching"; if (exec) TestSearching(); break;
51        case 6: name = "TestSpacePadding"; if (exec) TestSpacePadding(); break;
52        case 7: name = "TestPrefixAndSuffix"; if (exec) TestPrefixAndSuffix(); break;
53        case 8: name = "TestFindAndReplace"; if (exec) TestFindAndReplace(); break;
54        case 9: name = "TestBogus"; if (exec) TestBogus(); break;
55        case 10: name = "TestReverse"; if (exec) TestReverse(); break;
56        case 11: name = "TestMiscellaneous"; if (exec) TestMiscellaneous(); break;
57        case 12: name = "TestStackAllocation"; if (exec) TestStackAllocation(); break;
58        case 13: name = "TestUnescape"; if (exec) TestUnescape(); break;
59        case 14: name = "TestCountChar32"; if (exec) TestCountChar32(); break;
60        case 15: name = "TestStringEnumeration"; if (exec) TestStringEnumeration(); break;
61        case 16: name = "TestNameSpace"; if (exec) TestNameSpace(); break;
62        case 17: name = "TestUTF32"; if (exec) TestUTF32(); break;
63        case 18: name = "TestUTF8"; if (exec) TestUTF8(); break;
64        case 19: name = "TestReadOnlyAlias"; if (exec) TestReadOnlyAlias(); break;
65
66        default: name = ""; break; //needed to end loop
67    }
68}
69
70void
71UnicodeStringTest::TestBasicManipulation()
72{
73    UnicodeString   test1("Now is the time for all men to come swiftly to the aid of the party.\n");
74    UnicodeString   expectedValue;
75    UnicodeString   *c;
76
77    c=(UnicodeString *)test1.clone();
78    test1.insert(24, "good ");
79    expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
80    if (test1 != expectedValue)
81        errln("insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
82
83    c->insert(24, "good ");
84    if(*c != expectedValue) {
85        errln("clone()->insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
86    }
87    delete c;
88
89    test1.remove(41, 8);
90    expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
91    if (test1 != expectedValue)
92        errln("remove() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
93
94    test1.replace(58, 6, "ir country");
95    expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
96    if (test1 != expectedValue)
97        errln("replace() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
98
99    UChar     temp[80];
100    test1.extract(0, 15, temp);
101
102    UnicodeString       test2(temp, 15);
103
104    expectedValue = "Now is the time";
105    if (test2 != expectedValue)
106        errln("extract() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
107
108    test2 += " for me to go!\n";
109    expectedValue = "Now is the time for me to go!\n";
110    if (test2 != expectedValue)
111        errln("operator+=() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
112
113    if (test1.length() != 70)
114        errln("length() failed: expected 70, got " + test1.length());
115    if (test2.length() != 30)
116        errln("length() failed: expected 30, got " + test2.length());
117
118    UnicodeString test3;
119    test3.append((UChar32)0x20402);
120    if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
121        errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
122    }
123    if(test3.length() != 2){
124        errln("append or length failed for UChar32, expected 2, got " + test3.length());
125    }
126    test3.append((UChar32)0x0074);
127    if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
128        errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
129    }
130    if(test3.length() != 3){
131        errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
132    }
133
134    // test some UChar32 overloads
135    if( test3.setTo((UChar32)0x10330).length() != 2 ||
136        test3.insert(0, (UChar32)0x20100).length() != 4 ||
137        test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
138        (test3 = (UChar32)0x14001).length() != 2
139    ) {
140        errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
141    }
142
143    {
144        // test moveIndex32()
145        UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
146
147        if(
148            s.moveIndex32(2, -1)!=0 ||
149            s.moveIndex32(2, 1)!=4 ||
150            s.moveIndex32(2, 2)!=5 ||
151            s.moveIndex32(5, -2)!=2 ||
152            s.moveIndex32(0, -1)!=0 ||
153            s.moveIndex32(6, 1)!=6
154        ) {
155            errln("UnicodeString::moveIndex32() failed");
156        }
157
158        if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
159            errln("UnicodeString::getChar32Start() failed");
160        }
161
162        if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
163            errln("UnicodeString::getChar32Limit() failed");
164        }
165    }
166
167    {
168        // test new 2.2 constructors and setTo function that parallel Java's substring function.
169        UnicodeString src("Hello folks how are you?");
170        UnicodeString target1("how are you?");
171        if (target1 != UnicodeString(src, 12)) {
172            errln("UnicodeString(const UnicodeString&, int32_t) failed");
173        }
174        UnicodeString target2("folks");
175        if (target2 != UnicodeString(src, 6, 5)) {
176            errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
177        }
178        if (target1 != target2.setTo(src, 12)) {
179            errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
180        }
181    }
182
183    {
184        // op+ is new in ICU 2.8
185        UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
186        if(s!=UnicodeString("abcdefghi", "")) {
187            errln("operator+(UniStr, UniStr) failed");
188        }
189    }
190
191    {
192        // tests for Jitterbug 2360
193        // verify that APIs with source pointer + length accept length == -1
194        // mostly test only where modified, only few functions did not already do this
195        if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
196            errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
197        }
198
199        UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
200        UnicodeString s, t(buffer, -1, LENGTHOF(buffer));
201
202        if(s.setTo(buffer, -1, LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
203            errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
204        }
205        if(t.length()!=u_strlen(buffer)) {
206            errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
207        }
208
209        if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
210            errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
211        }
212        if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
213            errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
214        }
215
216        buffer[u_strlen(buffer)]=0xe4;
217        UnicodeString u(buffer, -1, LENGTHOF(buffer));
218        if(s.setTo(buffer, -1, LENGTHOF(buffer)).length()!=LENGTHOF(buffer)) {
219            errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
220        }
221        if(u.length()!=LENGTHOF(buffer)) {
222            errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
223        }
224
225        static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
226        UConverter *cnv;
227        UErrorCode errorCode=U_ZERO_ERROR;
228
229        cnv=ucnv_open("ISO-8859-1", &errorCode);
230        UnicodeString v(cs, -1, cnv, errorCode);
231        ucnv_close(cnv);
232        if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
233            errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
234        }
235    }
236
237#if U_CHARSET_IS_UTF8
238    {
239        // Test the hardcoded-UTF-8 UnicodeString optimizations.
240        static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
241        static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
242        UnicodeString from8a = UnicodeString((const char *)utf8);
243        UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
244        UnicodeString from16(FALSE, utf16, LENGTHOF(utf16));
245        if(from8a != from16 || from8b != from16) {
246            errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
247        }
248        char buffer[16];
249        int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
250        if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
251            errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
252        }
253        length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
254        if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
255            errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
256        }
257    }
258#endif
259}
260
261void
262UnicodeStringTest::TestCompare()
263{
264    UnicodeString   test1("this is a test");
265    UnicodeString   test2("this is a test");
266    UnicodeString   test3("this is a test of the emergency broadcast system");
267    UnicodeString   test4("never say, \"this is a test\"!!");
268
269    UnicodeString   test5((UChar)0x5000);
270    UnicodeString   test6((UChar)0x5100);
271
272    UChar         uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
273                 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
274    char            chars[] = "this is a test";
275
276    // test operator== and operator!=
277    if (test1 != test2 || test1 == test3 || test1 == test4)
278        errln("operator== or operator!= failed");
279
280    // test operator> and operator<
281    if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
282        !(test5 < test6)
283    ) {
284        errln("operator> or operator< failed");
285    }
286
287    // test operator>= and operator<=
288    if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
289        errln("operator>= or operator<= failed");
290
291    // test compare(UnicodeString)
292    if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
293        errln("compare(UnicodeString) failed");
294
295    //test compare(offset, length, UnicodeString)
296    if(test1.compare(0, 14, test2) != 0 ||
297        test3.compare(0, 14, test2) != 0 ||
298        test4.compare(12, 14, test2) != 0 ||
299        test3.compare(0, 18, test1) <=0  )
300        errln("compare(offset, length, UnicodeString) failes");
301
302    // test compare(UChar*)
303    if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
304        errln("compare(UChar*) failed");
305
306    // test compare(char*)
307    if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
308        errln("compare(char*) failed");
309
310    // test compare(UChar*, length)
311    if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
312        errln("compare(UChar*, length) failed");
313
314    // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
315    if (test1.compare(0, 14, test2, 0, 14) != 0
316    || test1.compare(0, 14, test3, 0, 14) != 0
317    || test1.compare(0, 14, test4, 12, 14) != 0)
318        errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
319
320    if (test1.compare(10, 4, test2, 0, 4) >= 0
321    || test1.compare(10, 4, test3, 22, 9) <= 0
322    || test1.compare(10, 4, test4, 22, 4) != 0)
323        errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
324
325    // test compareBetween
326    if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
327                    || test1.compareBetween(0, 14, test4, 12, 26) != 0)
328        errln("compareBetween failed");
329
330    if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
331                    || test1.compareBetween(10, 14, test4, 22, 26) != 0)
332        errln("compareBetween failed");
333
334    // test compare() etc. with strings that share a buffer but are not equal
335    test2=test1; // share the buffer, length() too large for the stackBuffer
336    test2.truncate(1); // change only the length, not the buffer
337    if( test1==test2 || test1<=test2 ||
338        test1.compare(test2)<=0 ||
339        test1.compareCodePointOrder(test2)<=0 ||
340        test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
341        test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
342        test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
343        test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
344    ) {
345        errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
346    }
347
348    /* test compareCodePointOrder() */
349    {
350        /* these strings are in ascending order */
351        static const UChar strings[][4]={
352            { 0x61, 0 },                    /* U+0061 */
353            { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
354            { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
355            { 0xd800, 0 },                  /* U+d800 */
356            { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
357            { 0xdfff, 0 },                  /* U+dfff */
358            { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
359            { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
360            { 0xd800, 0xdc02, 0 },          /* U+10002 */
361            { 0xd84d, 0xdc56, 0 }           /* U+23456 */
362        };
363        UnicodeString u[20]; // must be at least as long as strings[]
364        int32_t i;
365
366        for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])); ++i) {
367            u[i]=UnicodeString(TRUE, strings[i], -1);
368        }
369
370        for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])-1); ++i) {
371            if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
372                errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
373            }
374        }
375    }
376
377    /* test caseCompare() */
378    {
379        static const UChar
380        _mixed[]=               { 0x61, 0x42, 0x131, 0x3a3, 0xdf,       0x130,       0x49,  0xfb03,           0xd93f, 0xdfff, 0 },
381        _otherDefault[]=        { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69,  0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
382        _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69,        0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
383        _different[]=           { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130,       0x49,  0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
384
385        UnicodeString
386            mixed(TRUE, _mixed, -1),
387            otherDefault(TRUE, _otherDefault, -1),
388            otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
389            different(TRUE, _different, -1);
390
391        int8_t result;
392
393        /* test caseCompare() */
394        result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
395        if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
396            errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
397        }
398        result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
399        if(result!=0) {
400            errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
401        }
402        result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
403        if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
404            errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
405        }
406
407        /* test caseCompare() */
408        result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
409        if(result<=0) {
410            errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
411        }
412
413        /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
414        result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
415        if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
416            errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
417        }
418
419        /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
420        result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
421        if(result<=0) {
422            errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
423        }
424    }
425
426    // test that srcLength=-1 is handled in functions that
427    // take input const UChar */int32_t srcLength (j785)
428    {
429        static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
430        UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
431
432        if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
433            errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
434        }
435
436        if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
437            errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
438        }
439
440        if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
441            errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
442        }
443
444        if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
445            errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
446        }
447
448        if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
449            errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
450        }
451
452        UnicodeString s2, s3;
453        s2.replace(0, 0, u+1, -1);
454        s3.replace(0, 0, u, 1, -1);
455        if(s.compare(1, 999, s2)!=0 || s2!=s3) {
456            errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
457        }
458    }
459}
460
461void
462UnicodeStringTest::TestExtract()
463{
464    UnicodeString  test1("Now is the time for all good men to come to the aid of their country.", "");
465    UnicodeString  test2;
466    UChar          test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
467    char           test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
468    UnicodeString  test5;
469    char           test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
470
471    test1.extract(11, 12, test2);
472    test1.extract(11, 12, test3);
473    if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
474        errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
475    }
476
477    // test proper pinning in extractBetween()
478    test1.extractBetween(-3, 7, test5);
479    if(test5!=UNICODE_STRING("Now is ", 7)) {
480        errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
481    }
482
483    test1.extractBetween(11, 23, test5);
484    if (test1.extract(60, 71, test6) != 9) {
485        errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
486    }
487    if (test1.extract(11, 12, test6) != 12) {
488        errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
489    }
490
491    // convert test4 back to Unicode for comparison
492    UnicodeString test4b(test4, 12);
493
494    if (test1.extract(11, 12, (char *)NULL) != 12) {
495        errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
496    }
497    if (test1.extract(11, -1, test6) != 0) {
498        errln("UnicodeString.extract(-1) failed to stop reading the string.");
499    }
500
501    for (int32_t i = 0; i < 12; i++) {
502        if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
503            errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
504            break;
505        }
506        if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
507            errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
508            break;
509        }
510        if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
511            errln(UnicodeString("extracting into an array of char failed at position ") + i);
512            break;
513        }
514        if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
515            errln(UnicodeString("extracting with extractBetween failed at position ") + i);
516            break;
517        }
518    }
519
520    // test preflighting and overflows with invariant conversion
521    if (test1.extract(0, 10, (char *)NULL, "") != 10) {
522        errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
523    }
524
525    test4[2] = (char)0xff;
526    if (test1.extract(0, 10, test4, 2, "") != 10) {
527        errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
528    }
529    if (test4[2] != (char)0xff) {
530        errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
531    }
532
533    {
534        // test new, NUL-terminating extract() function
535        UnicodeString s("terminate", "");
536        UChar dest[20]={
537            0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
538            0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
539        };
540        UErrorCode errorCode;
541        int32_t length;
542
543        errorCode=U_ZERO_ERROR;
544        length=s.extract((UChar *)NULL, 0, errorCode);
545        if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
546            errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
547        }
548
549        errorCode=U_ZERO_ERROR;
550        length=s.extract(dest, s.length()-1, errorCode);
551        if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
552            errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
553                length, u_errorName(errorCode), s.length());
554        }
555
556        errorCode=U_ZERO_ERROR;
557        length=s.extract(dest, s.length(), errorCode);
558        if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
559            errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
560                length, u_errorName(errorCode), s.length());
561        }
562        if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
563            errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
564        }
565
566        errorCode=U_ZERO_ERROR;
567        length=s.extract(dest, s.length()+1, errorCode);
568        if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
569            errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
570                length, u_errorName(errorCode), s.length());
571        }
572        if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
573            errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
574        }
575    }
576
577    {
578        // test new UConverter extract() and constructor
579        UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
580        char buffer[32];
581        static const char expect[]={
582            (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
583            (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
584            (char)0xc3, (char)0x84,
585            (char)0xe1, (char)0xbb, (char)0x90
586        };
587        UErrorCode errorCode=U_ZERO_ERROR;
588        UConverter *cnv=ucnv_open("UTF-8", &errorCode);
589        int32_t length;
590
591        if(U_SUCCESS(errorCode)) {
592            // test preflighting
593            if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
594                errorCode!=U_BUFFER_OVERFLOW_ERROR
595            ) {
596                errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
597                      length, u_errorName(errorCode));
598            }
599            errorCode=U_ZERO_ERROR;
600            if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
601                errorCode!=U_BUFFER_OVERFLOW_ERROR
602            ) {
603                errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
604                      length, u_errorName(errorCode));
605            }
606
607            // try error cases
608            errorCode=U_ZERO_ERROR;
609            if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
610                errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
611            }
612            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
613            if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
614                errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
615            }
616            errorCode=U_ZERO_ERROR;
617
618            // extract for real
619            if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
620                uprv_memcmp(buffer, expect, 13)!=0 ||
621                buffer[13]!=0 ||
622                U_FAILURE(errorCode)
623            ) {
624                errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
625                      length, u_errorName(errorCode));
626            }
627            // Test again with just the converter name.
628            if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
629                uprv_memcmp(buffer, expect, 13)!=0 ||
630                buffer[13]!=0 ||
631                U_FAILURE(errorCode)
632            ) {
633                errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
634                      length, u_errorName(errorCode));
635            }
636
637            // try the constructor
638            UnicodeString t(expect, sizeof(expect), cnv, errorCode);
639            if(U_FAILURE(errorCode) || s!=t) {
640                errln("UnicodeString(UConverter) conversion failed (%s)",
641                      u_errorName(errorCode));
642            }
643
644            ucnv_close(cnv);
645        }
646    }
647}
648
649void
650UnicodeStringTest::TestRemoveReplace()
651{
652    UnicodeString   test1("The rain in Spain stays mainly on the plain");
653    UnicodeString   test2("eat SPAMburgers!");
654    UChar         test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
655    char            test4[] = "SPAM";
656    UnicodeString&  test5 = test1;
657
658    test1.replace(4, 4, test2, 4, 4);
659    test1.replace(12, 5, test3, 4);
660    test3[4] = 0;
661    test1.replace(17, 4, test3);
662    test1.replace(23, 4, test4);
663    test1.replaceBetween(37, 42, test2, 4, 8);
664
665    if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
666        errln("One of the replace methods failed:\n"
667              "  expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
668              "  got \"" + test1 + "\"");
669
670    test1.remove(21, 1);
671    test1.removeBetween(26, 28);
672
673    if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
674        errln("One of the remove methods failed:\n"
675              "  expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
676              "  got \"" + test1 + "\"");
677
678    for (int32_t i = 0; i < test1.length(); i++) {
679        if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
680            test1.setCharAt(i, 0x78);
681        }
682    }
683
684    if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
685        errln("One of the remove methods failed:\n"
686              "  expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
687              "  got \"" + test1 + "\"");
688
689    test1.remove();
690    if (test1.length() != 0)
691        errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
692}
693
694void
695UnicodeStringTest::TestSearching()
696{
697    UnicodeString test1("test test ttest tetest testesteststt");
698    UnicodeString test2("test");
699    UChar testChar = 0x74;
700
701    UChar32 testChar32 = 0x20402;
702    UChar testData[]={
703        //   0       1       2       3       4       5       6       7
704        0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
705
706        //   8       9      10      11      12      13      14      15
707        0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
708
709        //  16      17      18      19
710        0xdc02, 0xd841, 0x0073, 0x0000
711    };
712    UnicodeString test3(testData);
713    UnicodeString test4(testChar32);
714
715    uint16_t occurrences = 0;
716    int32_t startPos = 0;
717    for ( ;
718          startPos != -1 && startPos < test1.length();
719          (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
720        ;
721    if (occurrences != 6)
722        errln("indexOf failed: expected to find 6 occurrences, found " + occurrences);
723
724    for ( occurrences = 0, startPos = 10;
725          startPos != -1 && startPos < test1.length();
726          (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
727        ;
728    if (occurrences != 4)
729        errln("indexOf with starting offset failed: expected to find 4 occurrences, found " + occurrences);
730
731    int32_t endPos = 28;
732    for ( occurrences = 0, startPos = 5;
733          startPos != -1 && startPos < test1.length();
734          (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
735        ;
736    if (occurrences != 4)
737        errln("indexOf with starting and ending offsets failed: expected to find 4 occurrences, found " + occurrences);
738
739    //using UChar32 string
740    for ( startPos=0, occurrences=0;
741          startPos != -1 && startPos < test3.length();
742          (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
743        ;
744    if (occurrences != 4)
745        errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
746
747    for ( startPos=10, occurrences=0;
748          startPos != -1 && startPos < test3.length();
749          (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
750        ;
751    if (occurrences != 2)
752        errln("indexOf failed: expected to find 2 occurrences, found " + occurrences);
753    //---
754
755    for ( occurrences = 0, startPos = 0;
756          startPos != -1 && startPos < test1.length();
757          (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
758        ;
759    if (occurrences != 16)
760        errln("indexOf with character failed: expected to find 16 occurrences, found " + occurrences);
761
762    for ( occurrences = 0, startPos = 10;
763          startPos != -1 && startPos < test1.length();
764          (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
765        ;
766    if (occurrences != 12)
767        errln("indexOf with character & start offset failed: expected to find 12 occurrences, found " + occurrences);
768
769    for ( occurrences = 0, startPos = 5, endPos = 28;
770          startPos != -1 && startPos < test1.length();
771          (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
772        ;
773    if (occurrences != 10)
774        errln("indexOf with character & start & end offsets failed: expected to find 10 occurrences, found " + occurrences);
775
776    //testing for UChar32
777    UnicodeString subString;
778    for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
779        subString.append(test3, startPos, test3.length());
780        if(subString.indexOf(testChar32) != -1 ){
781             ++occurrences;
782        }
783        subString.remove();
784    }
785    if (occurrences != 14)
786        errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
787
788    for ( occurrences = 0, startPos = 0;
789          startPos != -1 && startPos < test3.length();
790          (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
791        ;
792    if (occurrences != 4)
793        errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
794
795    endPos=test3.length();
796    for ( occurrences = 0, startPos = 5;
797          startPos != -1 && startPos < test3.length();
798          (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
799        ;
800    if (occurrences != 3)
801        errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
802    //---
803
804    if(test1.lastIndexOf(test2)!=29) {
805        errln("test1.lastIndexOf(test2)!=29");
806    }
807
808    if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
809        errln("test1.lastIndexOf(test2, start) failed");
810    }
811
812    for ( occurrences = 0, startPos = 32;
813          startPos != -1;
814          (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
815        ;
816    if (occurrences != 4)
817        errln("lastIndexOf with starting and ending offsets failed: expected to find 4 occurrences, found " + occurrences);
818
819    for ( occurrences = 0, startPos = 32;
820          startPos != -1;
821          (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
822        ;
823    if (occurrences != 11)
824        errln("lastIndexOf with character & start & end offsets failed: expected to find 11 occurrences, found " + occurrences);
825
826    //testing UChar32
827    startPos=test3.length();
828    for ( occurrences = 0;
829          startPos != -1;
830          (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
831        ;
832    if (occurrences != 3)
833        errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
834
835
836    for ( occurrences = 0, endPos = test3.length();  endPos > 0; endPos -= 1){
837        subString.remove();
838        subString.append(test3, 0, endPos);
839        if(subString.lastIndexOf(testChar32) != -1 ){
840            ++occurrences;
841        }
842    }
843    if (occurrences != 18)
844        errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
845    //---
846
847    // test that indexOf(UChar32) and lastIndexOf(UChar32)
848    // do not find surrogate code points when they are part of matched pairs
849    // (= part of supplementary code points)
850    // Jitterbug 1542
851    if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
852        errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
853    }
854    if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
855        UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
856        test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
857    ) {
858        errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
859    }
860}
861
862void
863UnicodeStringTest::TestSpacePadding()
864{
865    UnicodeString test1("hello");
866    UnicodeString test2("   there");
867    UnicodeString test3("Hi!  How ya doin'?  Beautiful day, isn't it?");
868    UnicodeString test4;
869    UBool returnVal;
870    UnicodeString expectedValue;
871
872    returnVal = test1.padLeading(15);
873    expectedValue = "          hello";
874    if (returnVal == FALSE || test1 != expectedValue)
875        errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
876
877    returnVal = test2.padTrailing(15);
878    expectedValue = "   there       ";
879    if (returnVal == FALSE || test2 != expectedValue)
880        errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
881
882    expectedValue = test3;
883    returnVal = test3.padTrailing(15);
884    if (returnVal == TRUE || test3 != expectedValue)
885        errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
886
887    expectedValue = "hello";
888    test4.setTo(test1).trim();
889
890    if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
891        errln("trim(UnicodeString&) failed");
892
893    test1.trim();
894    if (test1 != expectedValue)
895        errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
896
897    test2.trim();
898    expectedValue = "there";
899    if (test2 != expectedValue)
900        errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
901
902    test3.trim();
903    expectedValue = "Hi!  How ya doin'?  Beautiful day, isn't it?";
904    if (test3 != expectedValue)
905        errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
906
907    returnVal = test1.truncate(15);
908    expectedValue = "hello";
909    if (returnVal == TRUE || test1 != expectedValue)
910        errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
911
912    returnVal = test2.truncate(15);
913    expectedValue = "there";
914    if (returnVal == TRUE || test2 != expectedValue)
915        errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
916
917    returnVal = test3.truncate(15);
918    expectedValue = "Hi!  How ya doi";
919    if (returnVal == FALSE || test3 != expectedValue)
920        errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
921}
922
923void
924UnicodeStringTest::TestPrefixAndSuffix()
925{
926    UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
927    UnicodeString test2("Now");
928    UnicodeString test3("country.");
929    UnicodeString test4("count");
930
931    if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
932        errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
933    }
934
935    if (test1.startsWith(test3) ||
936        test1.startsWith(test3.getBuffer(), test3.length()) ||
937        test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
938    ) {
939        errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
940    }
941
942    if (test1.endsWith(test2)) {
943        errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
944    }
945
946    if (!test1.endsWith(test3)) {
947        errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
948    }
949    if (!test1.endsWith(test3, 0, INT32_MAX)) {
950        errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
951    }
952
953    if(!test1.endsWith(test3.getBuffer(), test3.length())) {
954        errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
955    }
956    if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
957        errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
958    }
959
960    if (!test3.startsWith(test4)) {
961        errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
962    }
963
964    if (test4.startsWith(test3)) {
965        errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
966    }
967}
968
969void
970UnicodeStringTest::TestFindAndReplace()
971{
972    UnicodeString test1("One potato, two potato, three potato, four\n");
973    UnicodeString test2("potato");
974    UnicodeString test3("MISSISSIPPI");
975
976    UnicodeString expectedValue;
977
978    test1.findAndReplace(test2, test3);
979    expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
980    if (test1 != expectedValue)
981        errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
982    test1.findAndReplace(2, 32, test3, test2);
983    expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
984    if (test1 != expectedValue)
985        errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
986}
987
988void
989UnicodeStringTest::TestReverse()
990{
991    UnicodeString test("backwards words say to used I");
992
993    test.reverse();
994    test.reverse(2, 4);
995    test.reverse(7, 2);
996    test.reverse(10, 3);
997    test.reverse(14, 5);
998    test.reverse(20, 9);
999
1000    if (test != "I used to say words backwards")
1001        errln("reverse() failed:  Expected \"I used to say words backwards\",\n got \""
1002            + test + "\"");
1003
1004    test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1005    test.reverse();
1006    if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1007        errln("reverse() failed with supplementary characters");
1008    }
1009
1010    // Test case for ticket #8091:
1011    // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1012    // an odd-length string that contains no other lead surrogates.
1013    test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1014    UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1015    test.reverse();
1016    if(test!=expected) {
1017        errln("reverse() failed with only lead surrogate in the middle");
1018    }
1019}
1020
1021void
1022UnicodeStringTest::TestMiscellaneous()
1023{
1024    UnicodeString   test1("This is a test");
1025    UnicodeString   test2("This is a test");
1026    UnicodeString   test3("Me too!");
1027
1028    // test getBuffer(minCapacity) and releaseBuffer()
1029    test1=UnicodeString(); // make sure that it starts with its stackBuffer
1030    UChar *p=test1.getBuffer(20);
1031    if(test1.getCapacity()<20) {
1032        errln("UnicodeString::getBuffer(20).getCapacity()<20");
1033    }
1034
1035    test1.append((UChar)7); // must not be able to modify the string here
1036    test1.setCharAt(3, 7);
1037    test1.reverse();
1038    if( test1.length()!=0 ||
1039        test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1040        test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1041    ) {
1042        errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1043    }
1044
1045    p[0]=1;
1046    p[1]=2;
1047    p[2]=3;
1048    test1.releaseBuffer(3);
1049    test1.append((UChar)4);
1050
1051    if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1052        errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1053    }
1054
1055    // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1056    test1.releaseBuffer(1);
1057    if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1058        errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1059    }
1060
1061    // test getBuffer(const)
1062    const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1063    if( test1.length()!=4 ||
1064        q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1065        r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1066    ) {
1067        errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1068    }
1069
1070    // test releaseBuffer() with a NUL-terminated buffer
1071    test1.getBuffer(20)[2]=0;
1072    test1.releaseBuffer(); // implicit -1
1073    if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1074        errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1075    }
1076
1077    // test releaseBuffer() with a non-NUL-terminated buffer
1078    p=test1.getBuffer(256);
1079    for(int32_t i=0; i<test1.getCapacity(); ++i) {
1080        p[i]=(UChar)1;      // fill the buffer with all non-NUL code units
1081    }
1082    test1.releaseBuffer();  // implicit -1
1083    if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1084        errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1085    }
1086
1087    // test getTerminatedBuffer()
1088    test1=UnicodeString("This is another test.", "");
1089    test2=UnicodeString("This is another test.", "");
1090    q=test1.getTerminatedBuffer();
1091    if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1092        errln("getTerminatedBuffer()[length]!=0");
1093    }
1094
1095    const UChar u[]={ 5, 6, 7, 8, 0 };
1096    test1.setTo(FALSE, u, 3);
1097    q=test1.getTerminatedBuffer();
1098    if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1099        errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1100    }
1101
1102    test1.setTo(TRUE, u, -1);
1103    q=test1.getTerminatedBuffer();
1104    if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1105        errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1106    }
1107
1108    test1=UNICODE_STRING("la", 2);
1109    test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1);
1110    if(test1!=UNICODE_STRING("la lila", 7)) {
1111        errln("UnicodeString::append(const UChar *, start, length) failed");
1112    }
1113
1114    test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX);
1115    if(test1!=UNICODE_STRING("la dudum lila", 13)) {
1116        errln("UnicodeString::insert(start, const UniStr &, start, length) failed");
1117    }
1118
1119    static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1120    test1.insert(9, ucs, -1);
1121    if(test1!=UNICODE_STRING("la dudum hm lila", 16)) {
1122        errln("UnicodeString::insert(start, const UChar *, length) failed");
1123    }
1124
1125    test1.replace(9, 2, (UChar)0x2b);
1126    if(test1!=UNICODE_STRING("la dudum + lila", 15)) {
1127        errln("UnicodeString::replace(start, length, UChar) failed");
1128    }
1129
1130    if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1131        errln("UnicodeString::hasMetaData() returns TRUE");
1132    }
1133
1134    // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1135    test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1136    test1.truncate(36);  // ensure length()<getCapacity()
1137    test2=test1;  // share the buffer
1138    test1.truncate(5);
1139    if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1140        errln("UnicodeString(shared buffer).truncate() failed");
1141    }
1142    if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1143        errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1144              "modified another copy of the string!");
1145    }
1146    test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1147    test1.truncate(36);  // ensure length()<getCapacity()
1148    test2=test1;  // share the buffer
1149    test1.remove();
1150    if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1151        errln("UnicodeString(shared buffer).remove() failed");
1152    }
1153    if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1154        errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1155              "modified another copy of the string!");
1156    }
1157}
1158
1159void
1160UnicodeStringTest::TestStackAllocation()
1161{
1162    UChar           testString[] ={
1163        0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1164    UChar           guardWord = 0x4DED;
1165    UnicodeString*  test = 0;
1166
1167    test = new  UnicodeString(testString);
1168    if (*test != "This is a crazy test.")
1169        errln("Test string failed to initialize properly.");
1170    if (guardWord != 0x04DED)
1171        errln("Test string initialization overwrote guard word!");
1172
1173    test->insert(8, "only ");
1174    test->remove(15, 6);
1175    if (*test != "This is only a test.")
1176        errln("Manipulation of test string failed to work right.");
1177    if (guardWord != 0x4DED)
1178        errln("Manipulation of test string overwrote guard word!");
1179
1180    // we have to deinitialize and release the backing store by calling the destructor
1181    // explicitly, since we can't overload operator delete
1182    delete test;
1183
1184    UChar workingBuffer[] = {
1185        0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1186        0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1187        0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1188        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1189        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1190    UChar guardWord2 = 0x4DED;
1191
1192    test = new UnicodeString(workingBuffer, 35, 100);
1193    if (*test != "Now is the time for all men to come")
1194        errln("Stack-allocated backing store failed to initialize correctly.");
1195    if (guardWord2 != 0x4DED)
1196        errln("Stack-allocated backing store overwrote guard word!");
1197
1198    test->insert(24, "good ");
1199    if (*test != "Now is the time for all good men to come")
1200        errln("insert() on stack-allocated UnicodeString didn't work right");
1201    if (guardWord2 != 0x4DED)
1202        errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1203
1204    if (workingBuffer[24] != 0x67)
1205        errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1206
1207    *test += " to the aid of their country.";
1208    if (*test != "Now is the time for all good men to come to the aid of their country.")
1209        errln("Stack-allocated UnicodeString overflow didn't work");
1210    if (guardWord2 != 0x4DED)
1211        errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1212
1213    *test = "ha!";
1214    if (*test != "ha!")
1215        errln("Assignment to stack-allocated UnicodeString didn't work");
1216    if (workingBuffer[0] != 0x4e)
1217        errln("Change to UnicodeString after overflow are still affecting original buffer");
1218    if (guardWord2 != 0x4DED)
1219        errln("Change to UnicodeString after overflow overwrote guard word!");
1220
1221    // test read-only aliasing with setTo()
1222    workingBuffer[0] = 0x20ac;
1223    workingBuffer[1] = 0x125;
1224    workingBuffer[2] = 0;
1225    test->setTo(TRUE, workingBuffer, 2);
1226    if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1227        errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1228    }
1229
1230    UnicodeString *c=(UnicodeString *)test->clone();
1231
1232    workingBuffer[1] = 0x109;
1233    if(test->charAt(1) != 0x109) {
1234        errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1235    }
1236
1237    if(c->length() != 2 || c->charAt(1) != 0x125) {
1238        errln("clone(alias) did not copy the buffer");
1239    }
1240    delete c;
1241
1242    test->setTo(TRUE, workingBuffer, -1);
1243    if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1244        errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1245    }
1246
1247    test->setTo(FALSE, workingBuffer, -1);
1248    if(!test->isBogus()) {
1249        errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1250    }
1251
1252    delete test;
1253
1254    test=new UnicodeString();
1255    UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1256    test->setTo(buffer, 4, 10);
1257    if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1258        test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1259        errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1260    }
1261    delete test;
1262
1263
1264    // test the UChar32 constructor
1265    UnicodeString c32Test((UChar32)0x10ff2a);
1266    if( c32Test.length() != UTF_CHAR_LENGTH(0x10ff2a) ||
1267        c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1268    ) {
1269        errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1270    }
1271
1272    // test the (new) capacity constructor
1273    UnicodeString capTest(5, (UChar32)0x2a, 5);
1274    if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x2a) ||
1275        capTest.char32At(0) != 0x2a ||
1276        capTest.char32At(4) != 0x2a
1277    ) {
1278        errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1279    }
1280
1281    capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1282    if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x10ff2a) ||
1283        capTest.char32At(0) != 0x10ff2a ||
1284        capTest.char32At(4) != 0x10ff2a
1285    ) {
1286        errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1287    }
1288
1289    capTest = UnicodeString(5, (UChar32)0, 0);
1290    if(capTest.length() != 0) {
1291        errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1292    }
1293}
1294
1295/**
1296 * Test the unescape() function.
1297 */
1298void UnicodeStringTest::TestUnescape(void) {
1299    UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1300    UnicodeString OUT("abc");
1301    OUT.append((UChar)0x4567);
1302    OUT.append(" ");
1303    OUT.append((UChar)0xA);
1304    OUT.append((UChar)0xD);
1305    OUT.append(" ");
1306    OUT.append((UChar32)0x00101234);
1307    OUT.append("xyz");
1308    OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1309    UnicodeString result = IN.unescape();
1310    if (result != OUT) {
1311        errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1312              prettify(result) + ", expected " +
1313              prettify(OUT));
1314    }
1315
1316    // test that an empty string is returned in case of an error
1317    if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1318        errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1319    }
1320}
1321
1322/* test code point counting functions --------------------------------------- */
1323
1324/* reference implementation of UnicodeString::hasMoreChar32Than() */
1325static int32_t
1326_refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1327    int32_t count=s.countChar32(start, length);
1328    return count>number;
1329}
1330
1331/* compare the real function against the reference */
1332void
1333UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1334    if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1335        errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1336                start, length, number, s.hasMoreChar32Than(start, length, number));
1337    }
1338}
1339
1340void
1341UnicodeStringTest::TestCountChar32(void) {
1342    {
1343        UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1344
1345        // test countChar32()
1346        // note that this also calls and tests u_countChar32(length>=0)
1347        if(
1348            s.countChar32()!=4 ||
1349            s.countChar32(1)!=4 ||
1350            s.countChar32(2)!=3 ||
1351            s.countChar32(2, 3)!=2 ||
1352            s.countChar32(2, 0)!=0
1353        ) {
1354            errln("UnicodeString::countChar32() failed");
1355        }
1356
1357        // NUL-terminate the string buffer and test u_countChar32(length=-1)
1358        const UChar *buffer=s.getTerminatedBuffer();
1359        if(
1360            u_countChar32(buffer, -1)!=4 ||
1361            u_countChar32(buffer+1, -1)!=4 ||
1362            u_countChar32(buffer+2, -1)!=3 ||
1363            u_countChar32(buffer+3, -1)!=3 ||
1364            u_countChar32(buffer+4, -1)!=2 ||
1365            u_countChar32(buffer+5, -1)!=1 ||
1366            u_countChar32(buffer+6, -1)!=0
1367        ) {
1368            errln("u_countChar32(length=-1) failed");
1369        }
1370
1371        // test u_countChar32() with bad input
1372        if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1373            errln("u_countChar32(bad input) failed (returned non-zero counts)");
1374        }
1375    }
1376
1377    /* test data and variables for hasMoreChar32Than() */
1378    static const UChar str[]={
1379        0x61, 0x62, 0xd800, 0xdc00,
1380        0xd801, 0xdc01, 0x63, 0xd802,
1381        0x64, 0xdc03, 0x65, 0x66,
1382        0xd804, 0xdc04, 0xd805, 0xdc05,
1383        0x67
1384    };
1385    UnicodeString string(str, LENGTHOF(str));
1386    int32_t start, length, number;
1387
1388    /* test hasMoreChar32Than() */
1389    for(length=string.length(); length>=0; --length) {
1390        for(start=0; start<=length; ++start) {
1391            for(number=-1; number<=((length-start)+2); ++number) {
1392                _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1393            }
1394        }
1395    }
1396
1397    /* test hasMoreChar32Than() with pinning */
1398    for(start=-1; start<=string.length()+1; ++start) {
1399        for(number=-1; number<=((string.length()-start)+2); ++number) {
1400            _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1401        }
1402    }
1403
1404    /* test hasMoreChar32Than() with a bogus string */
1405    string.setToBogus();
1406    for(length=-1; length<=1; ++length) {
1407        for(start=-1; start<=length; ++start) {
1408            for(number=-1; number<=((length-start)+2); ++number) {
1409                _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1410            }
1411        }
1412    }
1413}
1414
1415void
1416UnicodeStringTest::TestBogus() {
1417    UnicodeString   test1("This is a test");
1418    UnicodeString   test2("This is a test");
1419    UnicodeString   test3("Me too!");
1420
1421    // test isBogus() and setToBogus()
1422    if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1423        errln("A string returned TRUE for isBogus()!");
1424    }
1425
1426    // NULL pointers are treated like empty strings
1427    // use other illegal arguments to make a bogus string
1428    test3.setTo(FALSE, test1.getBuffer(), -2);
1429    if(!test3.isBogus()) {
1430        errln("A bogus string returned FALSE for isBogus()!");
1431    }
1432    if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1433        errln("hashCode() failed");
1434    }
1435    if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1436        errln("bogus.getBuffer()!=0");
1437    }
1438    if (test1.indexOf(test3) != -1) {
1439        errln("bogus.indexOf() != -1");
1440    }
1441    if (test1.lastIndexOf(test3) != -1) {
1442        errln("bogus.lastIndexOf() != -1");
1443    }
1444    if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1445        errln("caseCompare() doesn't work with bogus strings");
1446    }
1447    if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1448        errln("compareCodePointOrder() doesn't work with bogus strings");
1449    }
1450
1451    // verify that non-assignment modifications fail and do not revive a bogus string
1452    test3.setToBogus();
1453    test3.append((UChar)0x61);
1454    if(!test3.isBogus() || test3.getBuffer()!=0) {
1455        errln("bogus.append('a') worked but must not");
1456    }
1457
1458    test3.setToBogus();
1459    test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1460    if(!test3.isBogus() || test3.getBuffer()!=0) {
1461        errln("bogus.findAndReplace() worked but must not");
1462    }
1463
1464    test3.setToBogus();
1465    test3.trim();
1466    if(!test3.isBogus() || test3.getBuffer()!=0) {
1467        errln("bogus.trim() revived bogus but must not");
1468    }
1469
1470    test3.setToBogus();
1471    test3.remove(1);
1472    if(!test3.isBogus() || test3.getBuffer()!=0) {
1473        errln("bogus.remove(1) revived bogus but must not");
1474    }
1475
1476    test3.setToBogus();
1477    if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1478        errln("bogus.setCharAt(0, 'b') worked but must not");
1479    }
1480
1481    test3.setToBogus();
1482    if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1483        errln("bogus.truncate(1) revived bogus but must not");
1484    }
1485
1486    // verify that assignments revive a bogus string
1487    test3.setToBogus();
1488    if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1489        errln("bogus.operator=() failed");
1490    }
1491
1492    test3.setToBogus();
1493    if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1494        errln("bogus.fastCopyFrom() failed");
1495    }
1496
1497    test3.setToBogus();
1498    if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1499        errln("bogus.setTo(UniStr) failed");
1500    }
1501
1502    test3.setToBogus();
1503    if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1504        errln("bogus.setTo(UniStr, 0) failed");
1505    }
1506
1507    test3.setToBogus();
1508    if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1509        errln("bogus.setTo(UniStr, 0, len) failed");
1510    }
1511
1512    test3.setToBogus();
1513    if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1514        errln("bogus.setTo(const UChar *, len) failed");
1515    }
1516
1517    test3.setToBogus();
1518    if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1519        errln("bogus.setTo(UChar) failed");
1520    }
1521
1522    test3.setToBogus();
1523    if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1524        errln("bogus.setTo(UChar32) failed");
1525    }
1526
1527    test3.setToBogus();
1528    if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1529        errln("bogus.setTo(readonly alias) failed");
1530    }
1531
1532    // writable alias to another string's buffer: very bad idea, just convenient for this test
1533    test3.setToBogus();
1534    if(!test3.isBogus() || test3.setTo((UChar *)test1.getBuffer(), test1.length(), test1.getCapacity()).isBogus() || test3!=test1) {
1535        errln("bogus.setTo(writable alias) failed");
1536    }
1537
1538    // verify simple, documented ways to turn a bogus string into an empty one
1539    test3.setToBogus();
1540    if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1541        errln("bogus.operator=(UnicodeString()) failed");
1542    }
1543
1544    test3.setToBogus();
1545    if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1546        errln("bogus.setTo(UnicodeString()) failed");
1547    }
1548
1549    test3.setToBogus();
1550    if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1551        errln("bogus.remove() failed");
1552    }
1553
1554    test3.setToBogus();
1555    if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1556        errln("bogus.remove(0, INT32_MAX) failed");
1557    }
1558
1559    test3.setToBogus();
1560    if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1561        errln("bogus.truncate(0) failed");
1562    }
1563
1564    test3.setToBogus();
1565    if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1566        errln("bogus.setTo((UChar32)-1) failed");
1567    }
1568
1569    static const UChar nul=0;
1570
1571    test3.setToBogus();
1572    if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1573        errln("bogus.setTo(&nul, 0) failed");
1574    }
1575
1576    test3.setToBogus();
1577    if(!test3.isBogus() || test3.getBuffer()!=0) {
1578        errln("setToBogus() failed to make a string bogus");
1579    }
1580
1581    test3.setToBogus();
1582    if(test1.isBogus() || !(test1=test3).isBogus()) {
1583        errln("normal=bogus failed to make the left string bogus");
1584    }
1585
1586    // test that NULL primitive input string values are treated like
1587    // empty strings, not errors (bogus)
1588    test2.setTo((UChar32)0x10005);
1589    if(test2.insert(1, NULL, 1).length()!=2) {
1590        errln("UniStr.insert(...NULL...) should not modify the string but does");
1591    }
1592
1593    UErrorCode errorCode=U_ZERO_ERROR;
1594    UnicodeString
1595        test4((const UChar *)NULL),
1596        test5(TRUE, (const UChar *)NULL, 1),
1597        test6((UChar *)NULL, 5, 5),
1598        test7((const char *)NULL, 3, NULL, errorCode);
1599    if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1600        errln("a constructor set to bogus for a NULL input string, should be empty");
1601    }
1602
1603    test4.setTo(NULL, 3);
1604    test5.setTo(TRUE, (const UChar *)NULL, 1);
1605    test6.setTo((UChar *)NULL, 5, 5);
1606    if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1607        errln("a setTo() set to bogus for a NULL input string, should be empty");
1608    }
1609
1610    // test that bogus==bogus<any
1611    if(test1!=test3 || test1.compare(test3)!=0) {
1612        errln("bogus==bogus failed");
1613    }
1614
1615    test2.remove();
1616    if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1617        errln("bogus<empty failed");
1618    }
1619}
1620
1621// StringEnumeration ------------------------------------------------------- ***
1622// most of StringEnumeration is tested elsewhere
1623// this test improves code coverage
1624
1625static const char *const
1626testEnumStrings[]={
1627    "a",
1628    "b",
1629    "c",
1630    "this is a long string which helps us test some buffer limits",
1631    "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1632};
1633
1634class TestEnumeration : public StringEnumeration {
1635public:
1636    TestEnumeration() : i(0) {}
1637
1638    virtual int32_t count(UErrorCode& /*status*/) const {
1639        return LENGTHOF(testEnumStrings);
1640    }
1641
1642    virtual const UnicodeString *snext(UErrorCode &status) {
1643        if(U_SUCCESS(status) && i<LENGTHOF(testEnumStrings)) {
1644            unistr=UnicodeString(testEnumStrings[i++], "");
1645            return &unistr;
1646        }
1647
1648        return NULL;
1649    }
1650
1651    virtual void reset(UErrorCode& /*status*/) {
1652        i=0;
1653    }
1654
1655    static inline UClassID getStaticClassID() {
1656        return (UClassID)&fgClassID;
1657    }
1658    virtual UClassID getDynamicClassID() const {
1659        return getStaticClassID();
1660    }
1661
1662private:
1663    static const char fgClassID;
1664
1665    int32_t i, length;
1666};
1667
1668const char TestEnumeration::fgClassID=0;
1669
1670void
1671UnicodeStringTest::TestStringEnumeration() {
1672    UnicodeString s;
1673    TestEnumeration ten;
1674    int32_t i, length;
1675    UErrorCode status;
1676
1677    const UChar *pu;
1678    const char *pc;
1679
1680    // test the next() default implementation and ensureCharsCapacity()
1681    for(i=0; i<LENGTHOF(testEnumStrings); ++i) {
1682        status=U_ZERO_ERROR;
1683        pc=ten.next(&length, status);
1684        s=UnicodeString(testEnumStrings[i], "");
1685        if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1686            errln("StringEnumeration.next(%d) failed", i);
1687        }
1688    }
1689    status=U_ZERO_ERROR;
1690    if(ten.next(&length, status)!=NULL) {
1691        errln("StringEnumeration.next(done)!=NULL");
1692    }
1693
1694    // test the unext() default implementation
1695    ten.reset(status);
1696    for(i=0; i<LENGTHOF(testEnumStrings); ++i) {
1697        status=U_ZERO_ERROR;
1698        pu=ten.unext(&length, status);
1699        s=UnicodeString(testEnumStrings[i], "");
1700        if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1701            errln("StringEnumeration.unext(%d) failed", i);
1702        }
1703    }
1704    status=U_ZERO_ERROR;
1705    if(ten.unext(&length, status)!=NULL) {
1706        errln("StringEnumeration.unext(done)!=NULL");
1707    }
1708
1709    // test that the default clone() implementation works, and returns NULL
1710    if(ten.clone()!=NULL) {
1711        errln("StringEnumeration.clone()!=NULL");
1712    }
1713
1714    // test that uenum_openFromStringEnumeration() works
1715    // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1716    StringEnumeration *newTen = new TestEnumeration;
1717    status=U_ZERO_ERROR;
1718    UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1719    if (uten==NULL || U_FAILURE(status)) {
1720        errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1721        return;
1722    }
1723
1724    // test  uenum_next()
1725    for(i=0; i<LENGTHOF(testEnumStrings); ++i) {
1726        status=U_ZERO_ERROR;
1727        pc=uenum_next(uten, &length, &status);
1728        if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1729            errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1730        }
1731    }
1732    status=U_ZERO_ERROR;
1733    if(uenum_next(uten, &length, &status)!=NULL) {
1734        errln("File %s, line %d, uenum_next(done)!=NULL");
1735    }
1736
1737    // test the uenum_unext()
1738    uenum_reset(uten, &status);
1739    for(i=0; i<LENGTHOF(testEnumStrings); ++i) {
1740        status=U_ZERO_ERROR;
1741        pu=uenum_unext(uten, &length, &status);
1742        s=UnicodeString(testEnumStrings[i], "");
1743        if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1744            errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1745        }
1746    }
1747    status=U_ZERO_ERROR;
1748    if(uenum_unext(uten, &length, &status)!=NULL) {
1749        errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1750    }
1751
1752    uenum_close(uten);
1753}
1754
1755/*
1756 * Namespace test, to make sure that macros like UNICODE_STRING include the
1757 * namespace qualifier.
1758 *
1759 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1760 */
1761#if U_HAVE_NAMESPACE
1762namespace bogus {
1763    class UnicodeString {
1764    public:
1765        enum EInvariant { kInvariant };
1766        UnicodeString() : i(1) {}
1767        UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {}
1768        UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1769) : i(length) {}
1770    private:
1771        int32_t i;
1772    };
1773}
1774#endif
1775
1776void
1777UnicodeStringTest::TestNameSpace() {
1778#if U_HAVE_NAMESPACE
1779    // Provoke name collision unless the UnicodeString macros properly
1780    // qualify the icu::UnicodeString class.
1781    using namespace bogus;
1782
1783    // Use all UnicodeString macros from unistr.h.
1784    icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1785    icu::UnicodeString s2=UNICODE_STRING("def", 3);
1786    icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1787
1788    // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1789    icu::UnicodeString s4=s1+s2+s3;
1790    if(s4.length()!=9) {
1791        errln("Something wrong with UnicodeString::operator+().");
1792    }
1793#endif
1794}
1795
1796void
1797UnicodeStringTest::TestUTF32() {
1798    // Input string length US_STACKBUF_SIZE to cause overflow of the
1799    // initially chosen fStackBuffer due to supplementary characters.
1800    static const UChar32 utf32[] = {
1801        0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1802        0x10000, 0x20000, 0xe0000, 0x10ffff
1803    };
1804    static const UChar expected_utf16[] = {
1805        0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1806        0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1807    };
1808    UnicodeString from32 = UnicodeString::fromUTF32(utf32, LENGTHOF(utf32));
1809    UnicodeString expected(FALSE, expected_utf16, LENGTHOF(expected_utf16));
1810    if(from32 != expected) {
1811        errln("UnicodeString::fromUTF32() did not create the expected string.");
1812    }
1813
1814    static const UChar utf16[] = {
1815        0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1816    };
1817    static const UChar32 expected_utf32[] = {
1818        0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1819    };
1820    UChar32 result32[16];
1821    UErrorCode errorCode = U_ZERO_ERROR;
1822    int32_t length32 =
1823        UnicodeString(FALSE, utf16, LENGTHOF(utf16)).
1824        toUTF32(result32, LENGTHOF(result32), errorCode);
1825    if( length32 != LENGTHOF(expected_utf32) ||
1826        0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1827        result32[length32] != 0
1828    ) {
1829        errln("UnicodeString::toUTF32() did not create the expected string.");
1830    }
1831}
1832
1833class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1834public:
1835    TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1836            : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
1837    virtual void Flush() { calledFlush = TRUE; }
1838    UBool calledFlush;
1839};
1840
1841void
1842UnicodeStringTest::TestUTF8() {
1843    static const uint8_t utf8[] = {
1844        // Code points:
1845        // 0x41, 0xd900,
1846        // 0x61, 0xdc00,
1847        // 0x110000, 0x5a,
1848        // 0x50000, 0x7a,
1849        // 0x10000, 0x20000,
1850        // 0xe0000, 0x10ffff
1851        0x41, 0xed, 0xa4, 0x80,
1852        0x61, 0xed, 0xb0, 0x80,
1853        0xf4, 0x90, 0x80, 0x80, 0x5a,
1854        0xf1, 0x90, 0x80, 0x80, 0x7a,
1855        0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1856        0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1857    };
1858    static const UChar expected_utf16[] = {
1859        0x41, 0xfffd,
1860        0x61, 0xfffd,
1861        0xfffd, 0x5a,
1862        0xd900, 0xdc00, 0x7a,
1863        0xd800, 0xdc00, 0xd840, 0xdc00,
1864        0xdb40, 0xdc00, 0xdbff, 0xdfff
1865    };
1866    UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1867    UnicodeString expected(FALSE, expected_utf16, LENGTHOF(expected_utf16));
1868
1869    if(from8 != expected) {
1870        errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1871    }
1872#if U_HAVE_STD_STRING
1873    U_STD_NSQ string utf8_string((const char *)utf8, sizeof(utf8));
1874    UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1875    if(from8b != expected) {
1876        errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1877    }
1878#endif
1879
1880    static const UChar utf16[] = {
1881        0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1882    };
1883    static const uint8_t expected_utf8[] = {
1884        0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1885        0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1886    };
1887    UnicodeString us(FALSE, utf16, LENGTHOF(utf16));
1888
1889    char buffer[64];
1890    TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1891    us.toUTF8(sink);
1892    if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1893        0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1894    ) {
1895        errln("UnicodeString::toUTF8() did not create the expected string.");
1896    }
1897    if(!sink.calledFlush) {
1898        errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1899    }
1900#if U_HAVE_STD_STRING
1901    // Initial contents for testing that toUTF8String() appends.
1902    U_STD_NSQ string result8 = "-->";
1903    U_STD_NSQ string expected8 = "-->" + U_STD_NSQ string((const char *)expected_utf8, sizeof(expected_utf8));
1904    // Use the return value just for testing.
1905    U_STD_NSQ string &result8r = us.toUTF8String(result8);
1906    if(result8r != expected8 || &result8r != &result8) {
1907        errln("UnicodeString::toUTF8String() did not create the expected string.");
1908    }
1909#endif
1910}
1911
1912// Test if this compiler supports Return Value Optimization of unnamed temporary objects.
1913static UnicodeString wrapUChars(const UChar *uchars) {
1914    return UnicodeString(TRUE, uchars, -1);
1915}
1916
1917void
1918UnicodeStringTest::TestReadOnlyAlias() {
1919    UChar uchars[]={ 0x61, 0x62, 0 };
1920    UnicodeString alias(TRUE, uchars, 2);
1921    if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1922        errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1923        return;
1924    }
1925    alias.truncate(1);
1926    if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1927        errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1928    }
1929    if(alias.getTerminatedBuffer()==uchars) {
1930        errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1931              "did not allocate and copy as expected.");
1932    }
1933    if(uchars[1]!=0x62) {
1934        errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1935              "modified the original buffer.");
1936    }
1937    if(1!=u_strlen(alias.getTerminatedBuffer())) {
1938        errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1939              "does not return a buffer terminated at the proper length.");
1940    }
1941
1942    alias.setTo(TRUE, uchars, 2);
1943    if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1944        errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1945        return;
1946    }
1947    alias.remove();
1948    if(alias.length()!=0) {
1949        errln("UnicodeString(read-only-alias).remove() did not work.");
1950    }
1951    if(alias.getTerminatedBuffer()==uchars) {
1952        errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1953              "did not un-alias as expected.");
1954    }
1955    if(uchars[0]!=0x61) {
1956        errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1957              "modified the original buffer.");
1958    }
1959    if(0!=u_strlen(alias.getTerminatedBuffer())) {
1960        errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1961              "does not return a buffer terminated at length 0.");
1962    }
1963
1964    UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1965    alias.setTo(FALSE, longString.getBuffer(), longString.length());
1966    alias.remove(0, 10);
1967    if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1968        errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1969    }
1970    alias.setTo(FALSE, longString.getBuffer(), longString.length());
1971    alias.remove(27, 99);
1972    if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
1973        errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1974    }
1975    alias.setTo(FALSE, longString.getBuffer(), longString.length());
1976    alias.retainBetween(6, 30);
1977    if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
1978        errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
1979    }
1980
1981    UChar abc[]={ 0x61, 0x62, 0x63, 0 };
1982    UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
1983
1984    UnicodeString temp;
1985    temp.fastCopyFrom(longString.tempSubString());
1986    if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
1987        errln("UnicodeString.tempSubString() failed");
1988    }
1989    temp.fastCopyFrom(longString.tempSubString(-3, 5));
1990    if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
1991        errln("UnicodeString.tempSubString(-3, 5) failed");
1992    }
1993    temp.fastCopyFrom(longString.tempSubString(17));
1994    if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
1995        errln("UnicodeString.tempSubString(17) failed");
1996    }
1997    temp.fastCopyFrom(longString.tempSubString(99));
1998    if(!temp.isEmpty()) {
1999        errln("UnicodeString.tempSubString(99) failed");
2000    }
2001    temp.fastCopyFrom(longString.tempSubStringBetween(6));
2002    if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2003        errln("UnicodeString.tempSubStringBetween(6) failed");
2004    }
2005    temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2006    if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2007        errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2008    }
2009    UnicodeString bogusString;
2010    bogusString.setToBogus();
2011    temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2012    if(!temp.isBogus()) {
2013        errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2014    }
2015}
2016