1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2001-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8/********************************************************************************
9*
10* File custrtrn.C
11*
12* Modification History:
13*        Name                     Description
14*        Ram                      String transformations test
15*********************************************************************************
16*/
17/****************************************************************************/
18
19
20#include <stdlib.h>
21#include <stdio.h>
22#include <string.h>
23#include "unicode/utypes.h"
24#include "unicode/ustring.h"
25#include "unicode/ures.h"
26#include "ustr_imp.h"
27#include "cintltst.h"
28#include "cmemory.h"
29#include "cstring.h"
30#include "cwchar.h"
31
32void addUCharTransformTest(TestNode** root);
33
34static void Test_strToUTF32(void);
35static void Test_strToUTF32_surrogates(void);
36static void Test_strFromUTF32(void);
37static void Test_strFromUTF32_surrogates(void);
38static void Test_UChar_UTF8_API(void);
39static void Test_FromUTF8(void);
40static void Test_FromUTF8Lenient(void);
41static void Test_UChar_WCHART_API(void);
42static void Test_widestrs(void);
43static void Test_WCHART_LongString(void);
44static void Test_strToJavaModifiedUTF8(void);
45static void Test_strFromJavaModifiedUTF8(void);
46static void TestNullEmptySource(void);
47
48void
49addUCharTransformTest(TestNode** root)
50{
51   addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32");
52   addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates");
53   addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32");
54   addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates");
55   addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
56   addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8");
57   addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient");
58   addTest(root, &Test_UChar_WCHART_API,  "custrtrn/Test_UChar_WCHART_API");
59   addTest(root, &Test_widestrs,  "custrtrn/Test_widestrs");
60#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
61   addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString");
62#endif
63   addTest(root, &Test_strToJavaModifiedUTF8,  "custrtrn/Test_strToJavaModifiedUTF8");
64   addTest(root, &Test_strFromJavaModifiedUTF8,  "custrtrn/Test_strFromJavaModifiedUTF8");
65   addTest(root, &TestNullEmptySource,  "custrtrn/TestNullEmptySource");
66}
67
68static const UChar32 src32[]={
69    0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
70    0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
71    0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
72    0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
73    0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
74    0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
75    0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
76    0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
77    0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
78    0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
79    0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
80    0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
81    0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
82    0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
83    0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
84    0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
85    0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
86    0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
87    0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
88    /* test non-BMP code points */
89    0x0002A699,
90    0x0002A69C, 0x0002A69D, 0x0002A69E, 0x0002A69F, 0x0002A6A0, 0x0002A6A5, 0x0002A6A6, 0x0002A6A7, 0x0002A6A8, 0x0002A6AB,
91    0x0002A6AC, 0x0002A6AD, 0x0002A6AE, 0x0002A6AF, 0x0002A6B0, 0x0002A6B1, 0x0002A6B3, 0x0002A6B5, 0x0002A6B6, 0x0002A6B7,
92    0x0002A6B8, 0x0002A6B9, 0x0002A6BA, 0x0002A6BB, 0x0002A6BC, 0x0002A6BD, 0x0002A6BE, 0x0002A6BF, 0x0002A6C0, 0x0002A6C1,
93    0x0002A6C2, 0x0002A6C3, 0x0002A6C4, 0x0002A6C8, 0x0002A6CA, 0x0002A6CB, 0x0002A6CD, 0x0002A6CE, 0x0002A6CF, 0x0002A6D0,
94    0x0002A6D1, 0x0002A6D2, 0x0002A6D3, 0x0002A6D4, 0x0002A6D5,
95
96    0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
97    0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
98    0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
99    0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
100    0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
101};
102
103static const UChar src16[] = {
104    0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
105    0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
106    0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
107    0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
108    0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
109    0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
110    0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
111    0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
112    0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
113    0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
114    0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
115    0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
116    0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
117    0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
118    0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
119    0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
120    0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
121    0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
122    0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
123
124    /* test non-BMP code points */
125    0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
126    0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
127    0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
128    0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
129    0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
130    0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
131    0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
132    0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
133    0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
134    0xD869, 0xDED5,
135
136    0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
137    0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
138    0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
139    0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
140    0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
141};
142
143
144static void Test_strToUTF32(void){
145    UErrorCode err = U_ZERO_ERROR;
146    UChar32 u32Target[400];
147    int32_t u32DestLen;
148    int i= 0;
149
150    /* first with length */
151    u32DestLen = -2;
152    u_strToUTF32(u32Target, 0, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
153    if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
154        log_err("u_strToUTF32(preflight with length): "
155                "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
156                (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
157        return;
158    }
159    err = U_ZERO_ERROR;
160    u32DestLen = -2;
161    u_strToUTF32(u32Target, UPRV_LENGTHOF(src32)+1, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
162    if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
163        log_err("u_strToUTF32(with length): "
164                "length %ld != %ld and %s != U_ZERO_ERROR\n",
165                (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
166        return;
167    }
168    /*for(i=0; i< u32DestLen; i++){
169        printf("0x%08X, ",uTarget[i]);
170        if(i%10==0){
171            printf("\n");
172        }
173    }*/
174    for(i=0; i< UPRV_LENGTHOF(src32); i++){
175        if(u32Target[i] != src32[i]){
176            log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src32[i], u32Target[i],i);
177        }
178    }
179    if(u32Target[i] != 0){
180        log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0, u32Target[i],i);
181    }
182
183    /* now NUL-terminated */
184    u32DestLen = -2;
185    u_strToUTF32(NULL,0, &u32DestLen, src16, -1,&err);
186    if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
187        log_err("u_strToUTF32(preflight with NUL-termination): "
188                "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
189                (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
190        return;
191    }
192    err = U_ZERO_ERROR;
193    u32DestLen = -2;
194    u_strToUTF32(u32Target, UPRV_LENGTHOF(src32), &u32DestLen, src16, -1,&err);
195    if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
196        log_err("u_strToUTF32(with NUL-termination): "
197                "length %ld != %ld and %s != U_ZERO_ERROR\n",
198                (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
199        return;
200    }
201
202    for(i=0; i< UPRV_LENGTHOF(src32); i++){
203        if(u32Target[i] != src32[i]){
204            log_verbose("u_strToUTF32(NUL-termination) failed expected: %04X got: %04X \n", src32[i], u32Target[i]);
205        }
206    }
207}
208
209/* test unpaired surrogates */
210static void Test_strToUTF32_surrogates() {
211    UErrorCode err = U_ZERO_ERROR;
212    UChar32 u32Target[400];
213    int32_t len16, u32DestLen;
214    int32_t numSubstitutions;
215    int i;
216
217    static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
218    static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 };
219    static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 };
220    static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 };
221    len16 = UPRV_LENGTHOF(surr16);
222    for(i = 0; i < 4; ++i) {
223        err = U_ZERO_ERROR;
224        u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err);
225        if(err != U_INVALID_CHAR_FOUND) {
226            log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
227                    (long)i, u_errorName(err));
228            return;
229        }
230
231        err = U_ZERO_ERROR;
232        u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err);
233        if(err != U_INVALID_CHAR_FOUND) {
234            log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
235                    (long)i, u_errorName(err));
236            return;
237        }
238
239        err = U_ZERO_ERROR;
240        u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err);
241        if(err != U_INVALID_CHAR_FOUND) {
242            log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
243                    (long)i, u_errorName(err));
244            return;
245        }
246
247        err = U_ZERO_ERROR;
248        u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err);
249        if(err != U_INVALID_CHAR_FOUND) {
250            log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
251                    (long)i, u_errorName(err));
252            return;
253        }
254    }
255
256    err = U_ZERO_ERROR;
257    u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err);
258    if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
259        log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
260                u_errorName(err));
261        return;
262    }
263
264    err = U_ZERO_ERROR;
265    u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err);
266    if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
267        log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
268                u_errorName(err));
269        return;
270    }
271
272    err = U_ZERO_ERROR;
273    u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err);
274    if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
275        log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
276                u_errorName(err));
277        return;
278    }
279
280    err = U_ZERO_ERROR;
281    u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err);
282    if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
283        log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
284                u_errorName(err));
285        return;
286    }
287
288    /* with substitution character */
289    numSubstitutions = -1;
290    err = U_ZERO_ERROR;
291    u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
292    if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
293        log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
294                u_errorName(err));
295        return;
296    }
297
298    err = U_ZERO_ERROR;
299    u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
300    if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) {
301        log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
302                u_errorName(err));
303        return;
304    }
305
306    err = U_ZERO_ERROR;
307    u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
308    if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
309        log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
310                u_errorName(err));
311        return;
312    }
313
314    err = U_ZERO_ERROR;
315    u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
316    if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) {
317        log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
318                u_errorName(err));
319        return;
320    }
321}
322
323static void Test_strFromUTF32(void){
324    UErrorCode err = U_ZERO_ERROR;
325    UChar uTarget[400];
326    int32_t uDestLen;
327    int i= 0;
328
329    /* first with length */
330    uDestLen = -2;
331    u_strFromUTF32(uTarget,0,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
332    if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
333        log_err("u_strFromUTF32(preflight with length): "
334                "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
335                (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
336        return;
337    }
338    err = U_ZERO_ERROR;
339    uDestLen = -2;
340    u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16)+1,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
341    if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
342        log_err("u_strFromUTF32(with length): "
343                "length %ld != %ld and %s != U_ZERO_ERROR\n",
344                (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
345        return;
346    }
347    /*for(i=0; i< uDestLen; i++){
348        printf("0x%04X, ",uTarget[i]);
349        if(i%10==0){
350            printf("\n");
351        }
352    }*/
353
354    for(i=0; i< uDestLen; i++){
355        if(uTarget[i] != src16[i]){
356            log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src16[i] ,uTarget[i],i);
357        }
358    }
359    if(uTarget[i] != 0){
360        log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0,uTarget[i],i);
361    }
362
363    /* now NUL-terminated */
364    uDestLen = -2;
365    u_strFromUTF32(NULL,0,&uDestLen,src32,-1,&err);
366    if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
367        log_err("u_strFromUTF32(preflight with NUL-termination): "
368                "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
369                (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
370        return;
371    }
372    err = U_ZERO_ERROR;
373    uDestLen = -2;
374    u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16),&uDestLen,src32,-1,&err);
375    if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
376        log_err("u_strFromUTF32(with NUL-termination): "
377                "length %ld != %ld and %s != U_ZERO_ERROR\n",
378                (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
379        return;
380    }
381
382    for(i=0; i< uDestLen; i++){
383        if(uTarget[i] != src16[i]){
384            log_verbose("u_strFromUTF32(with NUL-termination) failed expected: %04X got: %04X \n", src16[i] ,uTarget[i]);
385        }
386    }
387}
388
389/* test surrogate code points */
390static void Test_strFromUTF32_surrogates() {
391    UErrorCode err = U_ZERO_ERROR;
392    UChar uTarget[400];
393    int32_t len32, uDestLen;
394    int32_t numSubstitutions;
395    int i;
396
397    static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 };
398    static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
399    static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
400    static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45,
401                                            0x5a, 0xd900, 0xdc00, 0x7a, 0 };
402    len32 = UPRV_LENGTHOF(surr32);
403    for(i = 0; i < 6; ++i) {
404        err = U_ZERO_ERROR;
405        u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err);
406        if(err != U_INVALID_CHAR_FOUND) {
407            log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
408                    (long)i, u_errorName(err));
409            return;
410        }
411
412        err = U_ZERO_ERROR;
413        u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err);
414        if(err != U_INVALID_CHAR_FOUND) {
415            log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
416                    (long)i, u_errorName(err));
417            return;
418        }
419
420        err = U_ZERO_ERROR;
421        u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err);
422        if(err != U_INVALID_CHAR_FOUND) {
423            log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
424                    (long)i, u_errorName(err));
425            return;
426        }
427
428        err = U_ZERO_ERROR;
429        u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err);
430        if(err != U_INVALID_CHAR_FOUND) {
431            log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
432                    (long)i, u_errorName(err));
433            return;
434        }
435    }
436
437    err = U_ZERO_ERROR;
438    u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err);
439    if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
440        log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
441                u_errorName(err));
442        return;
443    }
444
445    err = U_ZERO_ERROR;
446    u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err);
447    if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
448        log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
449                u_errorName(err));
450        return;
451    }
452
453    err = U_ZERO_ERROR;
454    u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err);
455    if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
456        log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
457                u_errorName(err));
458        return;
459    }
460
461    err = U_ZERO_ERROR;
462    u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err);
463    if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
464        log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
465                u_errorName(err));
466        return;
467    }
468
469    /* with substitution character */
470    numSubstitutions = -1;
471    err = U_ZERO_ERROR;
472    u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
473    if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) {
474        log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
475                u_errorName(err));
476        return;
477    }
478
479    err = U_ZERO_ERROR;
480    u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
481    if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) {
482        log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
483                u_errorName(err));
484        return;
485    }
486
487    err = U_ZERO_ERROR;
488    u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
489    if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) {
490        log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
491                u_errorName(err));
492        return;
493    }
494
495    err = U_ZERO_ERROR;
496    u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
497    if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) {
498        log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
499                u_errorName(err));
500        return;
501    }
502}
503
504static void Test_UChar_UTF8_API(void){
505
506    UErrorCode err = U_ZERO_ERROR;
507    UChar uTemp[1];
508    char u8Temp[1];
509    UChar* uTarget=uTemp;
510    const char* u8Src;
511    int32_t u8SrcLen = 0;
512    int32_t uTargetLength = 0;
513    int32_t uDestLen=0;
514    const UChar* uSrc = src16;
515    int32_t uSrcLen   = sizeof(src16)/2;
516    char* u8Target = u8Temp;
517    int32_t u8TargetLength =0;
518    int32_t u8DestLen =0;
519    UBool failed = FALSE;
520    int i= 0;
521    int32_t numSubstitutions;
522
523    {
524        /* preflight */
525        u8Temp[0] = 0x12;
526        u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
527        if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){
528            err = U_ZERO_ERROR;
529            u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
530            u8TargetLength = u8DestLen;
531
532            u8Target[u8TargetLength] = (char)0xfe;
533            u8DestLen = -1;
534            u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
535            if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){
536                log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err));
537                return;
538            }
539
540        }
541        else {
542            log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
543        }
544        failed = FALSE;
545        /*for(i=0; i< u8DestLen; i++){
546            printf("0x%04X, ",u8Target[i]);
547            if(i%10==0){
548                printf("\n");
549            }
550        }*/
551        /*for(i=0; i< u8DestLen; i++){
552            if(u8Target[i] != src8[i]){
553                log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
554                failed =TRUE;
555            }
556        }
557        if(failed){
558            log_err("u_strToUTF8() failed \n");
559        }*/
560        u8Src = u8Target;
561        u8SrcLen = u8DestLen;
562
563        /* preflight */
564        uTemp[0] = 0x1234;
565        u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
566        if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){
567            err = U_ZERO_ERROR;
568            uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
569            uTargetLength =  uDestLen;
570
571            uTarget[uTargetLength] = 0xfff0;
572            uDestLen = -1;
573            u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
574        }
575        else {
576            log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n");
577        }
578        /*for(i=0; i< uDestLen; i++){
579            printf("0x%04X, ",uTarget[i]);
580            if(i%10==0){
581                printf("\n");
582            }
583        }*/
584
585        if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) {
586            failed = TRUE;
587        }
588        for(i=0; i< uSrcLen; i++){
589            if(uTarget[i] != src16[i]){
590                log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
591                failed =TRUE;
592            }
593        }
594        if(failed){
595            log_err("error: u_strFromUTF8(after preflighting) failed\n");
596        }
597
598        free(u8Target);
599        free(uTarget);
600    }
601    {
602        u8SrcLen = -1;
603        uTargetLength = 0;
604        uSrcLen =-1;
605        u8TargetLength=0;
606        failed = FALSE;
607        /* preflight */
608        u_strToUTF8(NULL,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
609        if(err == U_BUFFER_OVERFLOW_ERROR){
610            err = U_ZERO_ERROR;
611            u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
612            u8TargetLength = u8DestLen;
613
614            u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
615
616        }
617        else {
618            log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
619        }
620        failed = FALSE;
621        /*for(i=0; i< u8DestLen; i++){
622            printf("0x%04X, ",u8Target[i]);
623            if(i%10==0){
624                printf("\n");
625            }
626        }*/
627        /*for(i=0; i< u8DestLen; i++){
628            if(u8Target[i] != src8[i]){
629                log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
630                failed =TRUE;
631            }
632        }
633        if(failed){
634            log_err("u_strToUTF8() failed \n");
635        }*/
636        u8Src = u8Target;
637        u8SrcLen = u8DestLen;
638
639        /* preflight */
640        u_strFromUTF8(NULL,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
641        if(err == U_BUFFER_OVERFLOW_ERROR){
642            err = U_ZERO_ERROR;
643            uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
644            uTargetLength =  uDestLen;
645
646            u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
647        }
648        else {
649            log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
650        }
651        /*for(i=0; i< uDestLen; i++){
652            printf("0x%04X, ",uTarget[i]);
653            if(i%10==0){
654                printf("\n");
655            }
656        }*/
657
658        for(i=0; i< uSrcLen; i++){
659            if(uTarget[i] != src16[i]){
660                log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
661                failed =TRUE;
662            }
663        }
664        if(failed){
665            log_err("u_strToUTF8() failed \n");
666        }
667
668        free(u8Target);
669        free(uTarget);
670    }
671
672    /* test UTF-8 with single surrogates - illegal in Unicode 3.2 */
673    // Since ICU 60, each surrogate byte sequence is treated as 3 single-byte errors.
674    {
675        static const UChar
676            withLead16[]={ 0x1800, 0xd89a, 0x0061 },
677            withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 },
678            withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0xfffd, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */
679            withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0xd900, 0xdc05, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */
680        static const uint8_t
681            withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 },
682            withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 },
683            withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */
684            withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */
685        UChar out16[10];
686        char out8[10];
687
688        if(
689            (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withLead16, UPRV_LENGTHOF(withLead16), &err), err!=U_INVALID_CHAR_FOUND) ||
690            (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withTrail16, -1, &err), err!=U_INVALID_CHAR_FOUND) ||
691            (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withLead8, UPRV_LENGTHOF(withLead8), &err), err!=U_INVALID_CHAR_FOUND) ||
692            (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withTrail8, -1, &err), err!=U_INVALID_CHAR_FOUND)
693        ) {
694            log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n");
695        }
696
697        /* test error handling with substitution characters */
698
699        /* from UTF-8 with length */
700        err=U_ZERO_ERROR;
701        numSubstitutions=-1;
702        out16[0]=0x55aa;
703        uDestLen=0;
704        u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
705                             (const char *)withTrail8, uprv_strlen((const char *)withTrail8),
706                             0x50005, &numSubstitutions,
707                             &err);
708        if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) ||
709                             0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) ||
710                             numSubstitutions!=3) {
711            log_err("error: u_strFromUTF8WithSub(length) failed\n");
712        }
713
714        /* from UTF-8 with NUL termination */
715        err=U_ZERO_ERROR;
716        numSubstitutions=-1;
717        out16[0]=0x55aa;
718        uDestLen=0;
719        u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
720                             (const char *)withTrail8, -1,
721                             0xfffd, &numSubstitutions,
722                             &err);
723        if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) ||
724                             0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) ||
725                             numSubstitutions!=3) {
726            log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n");
727        }
728
729        /* preflight from UTF-8 with NUL termination */
730        err=U_ZERO_ERROR;
731        numSubstitutions=-1;
732        out16[0]=0x55aa;
733        uDestLen=0;
734        u_strFromUTF8WithSub(out16, 1, &uDestLen,
735                             (const char *)withTrail8, -1,
736                             0x50005, &numSubstitutions,
737                             &err);
738        if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=3) {
739            log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n");
740        }
741
742        /* to UTF-8 with length */
743        err=U_ZERO_ERROR;
744        numSubstitutions=-1;
745        out8[0]=(char)0xf5;
746        u8DestLen=0;
747        u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
748                           withTrail16, u_strlen(withTrail16),
749                           0xfffd, &numSubstitutions,
750                           &err);
751        if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) ||
752                             0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) ||
753                             numSubstitutions!=1) {
754            log_err("error: u_strToUTF8WithSub(length) failed\n");
755        }
756
757        /* to UTF-8 with NUL termination */
758        err=U_ZERO_ERROR;
759        numSubstitutions=-1;
760        out8[0]=(char)0xf5;
761        u8DestLen=0;
762        u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
763                           withTrail16, -1,
764                           0x1a, &numSubstitutions,
765                           &err);
766        if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8Sub1A) ||
767                             0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) ||
768                             numSubstitutions!=1) {
769            log_err("error: u_strToUTF8WithSub(NUL termination) failed\n");
770        }
771
772        /* preflight to UTF-8 with NUL termination */
773        err=U_ZERO_ERROR;
774        numSubstitutions=-1;
775        out8[0]=(char)0xf5;
776        u8DestLen=0;
777        u_strToUTF8WithSub(out8, 1, &u8DestLen,
778                           withTrail16, -1,
779                           0xfffd, &numSubstitutions,
780                           &err);
781        if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) ||
782                                           numSubstitutions!=1) {
783            log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n");
784        }
785
786        /* test that numSubstitutions==0 if there are no substitutions */
787
788        /* from UTF-8 with length (just first 3 bytes which are valid) */
789        err=U_ZERO_ERROR;
790        numSubstitutions=-1;
791        out16[0]=0x55aa;
792        uDestLen=0;
793        u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
794                             (const char *)withTrail8, 3,
795                             0x50005, &numSubstitutions,
796                             &err);
797        if(U_FAILURE(err) || uDestLen!=1 ||
798                             0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
799                             numSubstitutions!=0) {
800            log_err("error: u_strFromUTF8WithSub(no subs) failed\n");
801        }
802
803        /* to UTF-8 with length (just first UChar which is valid) */
804        err=U_ZERO_ERROR;
805        numSubstitutions=-1;
806        out8[0]=(char)0xf5;
807        u8DestLen=0;
808        u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
809                           withTrail16, 1,
810                           0xfffd, &numSubstitutions,
811                           &err);
812        if(U_FAILURE(err) || u8DestLen!=3 ||
813                             0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
814                             numSubstitutions!=0) {
815            log_err("error: u_strToUTF8WithSub(no subs) failed\n");
816        }
817
818        /* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */
819
820        /* from UTF-8 with length (just first 3 bytes which are valid) */
821        err=U_ZERO_ERROR;
822        numSubstitutions=-1;
823        out16[0]=0x55aa;
824        uDestLen=0;
825        u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
826                             (const char *)withTrail8, 3,
827                             U_SENTINEL, &numSubstitutions,
828                             &err);
829        if(U_FAILURE(err) || uDestLen!=1 ||
830                             0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
831                             numSubstitutions!=0) {
832            log_err("error: u_strFromUTF8WithSub(no subchar) failed\n");
833        }
834
835        /* to UTF-8 with length (just first UChar which is valid) */
836        err=U_ZERO_ERROR;
837        numSubstitutions=-1;
838        out8[0]=(char)0xf5;
839        u8DestLen=0;
840        u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
841                           withTrail16, 1,
842                           U_SENTINEL, &numSubstitutions,
843                           &err);
844        if(U_FAILURE(err) || u8DestLen!=3 ||
845                             0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
846                             numSubstitutions!=0) {
847            log_err("error: u_strToUTF8WithSub(no subchar) failed\n");
848        }
849    }
850    {
851        /*
852         * Test with an illegal lead byte that would be followed by more than 3 trail bytes.
853         * See ticket #10371.
854         */
855        static const char src[1]={ (char)0xf8 };
856        UChar out16[10];
857        err=U_ZERO_ERROR;
858        u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, src, 1, &err);
859        if(err!=U_INVALID_CHAR_FOUND) {
860            log_err("error: u_strFromUTF8(5-byte lead byte) failed\n");
861        }
862    }
863}
864
865/* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */
866static UBool
867equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) {
868    UChar c1, c2;
869
870    while(length>0) {
871        c1=*s++;
872        c2=*t++;
873        if(c1!=c2 && c2!=0xfffd) {
874            return FALSE;
875        }
876        --length;
877    }
878    return TRUE;
879}
880
881/* test u_strFromUTF8Lenient() */
882static void
883Test_FromUTF8(void) {
884    /*
885     * Test case from icu-support list 20071130 "u_strFromUTF8() returns U_INVALID_CHAR_FOUND(10)"
886     */
887    static const uint8_t bytes[]={ 0xe0, 0xa5, 0x9c, 0 };
888    UChar dest[64];
889    UChar *destPointer;
890    int32_t destLength;
891    UErrorCode errorCode;
892
893    /* 3 bytes input, one UChar output (U+095C) */
894    errorCode=U_ZERO_ERROR;
895    destLength=-99;
896    destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 3, &errorCode);
897    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
898        log_err("error: u_strFromUTF8(preflight srcLength=3) fails: destLength=%ld - %s\n",
899                (long)destLength, u_errorName(errorCode));
900    }
901
902    /* 4 bytes input, two UChars output (U+095C U+0000) */
903    errorCode=U_ZERO_ERROR;
904    destLength=-99;
905    destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 4, &errorCode);
906    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=2) {
907        log_err("error: u_strFromUTF8(preflight srcLength=4) fails: destLength=%ld - %s\n",
908                (long)destLength, u_errorName(errorCode));
909    }
910
911    /* NUL-terminated 3 bytes input, one UChar output (U+095C) */
912    errorCode=U_ZERO_ERROR;
913    destLength=-99;
914    destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, -1, &errorCode);
915    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
916        log_err("error: u_strFromUTF8(preflight srcLength=-1) fails: destLength=%ld - %s\n",
917                (long)destLength, u_errorName(errorCode));
918    }
919
920    /* 3 bytes input, one UChar output (U+095C), transform not just preflight */
921    errorCode=U_ZERO_ERROR;
922    dest[0]=dest[1]=99;
923    destLength=-99;
924    destPointer=u_strFromUTF8(dest, UPRV_LENGTHOF(dest), &destLength, (const char *)bytes, 3, &errorCode);
925    if(U_FAILURE(errorCode) || destPointer!=dest || destLength!=1 || dest[0]!=0x95c || dest[1]!=0) {
926        log_err("error: u_strFromUTF8(transform srcLength=3) fails: destLength=%ld - %s\n",
927                (long)destLength, u_errorName(errorCode));
928    }
929}
930
931/* test u_strFromUTF8Lenient() */
932static void
933Test_FromUTF8Lenient(void) {
934    /*
935     * Multiple input strings, each NUL-terminated.
936     * Terminate with a string starting with 0xff.
937     */
938    static const uint8_t bytes[]={
939        /* well-formed UTF-8 */
940        0x61,  0xc3, 0x9f,  0xe0, 0xa0, 0x80,  0xf0, 0xa0, 0x80, 0x80,
941        0x62,  0xc3, 0xa0,  0xe0, 0xa0, 0x81,  0xf0, 0xa0, 0x80, 0x81, 0,
942
943        /* various malformed sequences */
944        0xc3, 0xc3, 0x9f,  0xc3, 0xa0,  0xe0, 0x80, 0x8a,  0xf0, 0x41, 0x42, 0x43, 0,
945
946        /* truncated input */
947        0xc3, 0,
948        0xe0, 0,
949        0xe0, 0xa0, 0,
950        0xf0, 0,
951        0xf0, 0x90, 0,
952        0xf0, 0x90, 0x80, 0,
953
954        /* non-ASCII characters in the last few bytes */
955        0x61,  0xc3, 0x9f,  0xe0, 0xa0, 0x80, 0,
956        0x61,  0xe0, 0xa0, 0x80,  0xc3, 0x9f, 0,
957
958        /* empty string */
959        0,
960
961        /* finish */
962        0xff, 0
963    };
964
965    /* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */
966    static const UChar uchars[]={
967        0x61, 0xdf, 0x800,  0xd840, 0xdc00,
968        0x62, 0xe0, 0x801,  0xd840, 0xdc01,  0,
969
970        0xfffd, 0x9f, 0xe0, 0xa,  0xfffd, 0xfffd,  0,
971
972        0xfffd, 0,
973        0xfffd, 0,
974        0xfffd, 0,
975        0xfffd, 0,
976        0xfffd, 0,
977        0xfffd, 0,
978
979        0x61, 0xdf, 0x800,  0,
980        0x61, 0x800, 0xdf,  0,
981
982        0,
983
984        0
985    };
986
987    UChar dest[64];
988    const char *pb;
989    const UChar *pu, *pDest;
990    int32_t srcLength, destLength0, destLength;
991    int number;
992    UErrorCode errorCode;
993
994    /* verify checking for some illegal arguments */
995    dest[0]=0x1234;
996    destLength=-1;
997    errorCode=U_ZERO_ERROR;
998    pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode);
999    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) {
1000        log_err("u_strFromUTF8Lenient(src=NULL) failed\n");
1001    }
1002
1003    dest[0]=0x1234;
1004    destLength=-1;
1005    errorCode=U_ZERO_ERROR;
1006    pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode);
1007    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1008        log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n");
1009    }
1010
1011    dest[0]=0x1234;
1012    destLength=-1;
1013    errorCode=U_MEMORY_ALLOCATION_ERROR;
1014    pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode);
1015    if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) {
1016        log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n");
1017    }
1018
1019    /* test normal behavior */
1020    number=0; /* string number for log_err() */
1021
1022    for(pb=(const char *)bytes, pu=uchars;
1023        *pb!=(char)0xff;
1024        pb+=srcLength+1, pu+=destLength0+1, ++number
1025    ) {
1026        srcLength=uprv_strlen(pb);
1027        destLength0=u_strlen(pu);
1028
1029        /* preflighting with NUL-termination */
1030        dest[0]=0x1234;
1031        destLength=-1;
1032        errorCode=U_ZERO_ERROR;
1033        pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode);
1034        if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1035            pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0
1036        ) {
1037            log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number);
1038        }
1039
1040        /* preflighting/some capacity with NUL-termination */
1041        if(srcLength>0) {
1042            dest[destLength0-1]=0x1234;
1043            destLength=-1;
1044            errorCode=U_ZERO_ERROR;
1045            pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode);
1046            if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1047                dest[destLength0-1]!=0x1234 || destLength!=destLength0
1048            ) {
1049                log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number);
1050            }
1051        }
1052
1053        /* conversion with NUL-termination, much capacity */
1054        dest[0]=dest[destLength0]=0x1234;
1055        destLength=-1;
1056        errorCode=U_ZERO_ERROR;
1057        pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, -1, &errorCode);
1058        if (errorCode!=U_ZERO_ERROR ||
1059            pDest!=dest || dest[destLength0]!=0 ||
1060            destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1061        ) {
1062            log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number);
1063        }
1064
1065        /* conversion with NUL-termination, exact capacity */
1066        dest[0]=dest[destLength0]=0x1234;
1067        destLength=-1;
1068        errorCode=U_ZERO_ERROR;
1069        pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode);
1070        if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1071            pDest!=dest || dest[destLength0]!=0x1234 ||
1072            destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1073        ) {
1074            log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number);
1075        }
1076
1077        /* preflighting with length */
1078        dest[0]=0x1234;
1079        destLength=-1;
1080        errorCode=U_ZERO_ERROR;
1081        pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode);
1082        if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1083            pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength
1084        ) {
1085            log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number);
1086        }
1087
1088        /* preflighting/some capacity with length */
1089        if(srcLength>0) {
1090            dest[srcLength-1]=0x1234;
1091            destLength=-1;
1092            errorCode=U_ZERO_ERROR;
1093            pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode);
1094            if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1095                dest[srcLength-1]!=0x1234 || destLength!=srcLength
1096            ) {
1097                log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number);
1098            }
1099        }
1100
1101        /* conversion with length, much capacity */
1102        dest[0]=dest[destLength0]=0x1234;
1103        destLength=-1;
1104        errorCode=U_ZERO_ERROR;
1105        pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, srcLength, &errorCode);
1106        if (errorCode!=U_ZERO_ERROR ||
1107            pDest!=dest || dest[destLength0]!=0 ||
1108            destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1109        ) {
1110            log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number);
1111        }
1112
1113        /* conversion with length, srcLength capacity */
1114        dest[0]=dest[srcLength]=dest[destLength0]=0x1234;
1115        destLength=-1;
1116        errorCode=U_ZERO_ERROR;
1117        pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode);
1118        if(srcLength==destLength0) {
1119            if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1120                pDest!=dest || dest[destLength0]!=0x1234 ||
1121                destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1122            ) {
1123                log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number);
1124            }
1125        } else {
1126            if (errorCode!=U_ZERO_ERROR ||
1127                pDest!=dest || dest[destLength0]!=0 ||
1128                destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1129            ) {
1130                log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number);
1131            }
1132        }
1133    }
1134}
1135
1136static const uint16_t src16j[] = {
1137    0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
1138    0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
1139    0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1140    0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1141    0x0000,
1142    /* Test only ASCII */
1143
1144};
1145static const uint16_t src16WithNulls[] = {
1146    0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0000,
1147    0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 0x0000,
1148    0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0000,
1149    0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 0x0000,
1150    0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1151    0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1152    0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1153    0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1154    /* test only ASCII */
1155    /*
1156    0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD,
1157    0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1158    0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1,
1159    0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
1160    0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5,
1161    0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1162    0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9,
1163    0x0054, 0x0000 */
1164
1165};
1166static void Test_UChar_WCHART_API(void){
1167#if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1168    UErrorCode err = U_ZERO_ERROR;
1169    const UChar* uSrc = src16j;
1170    int32_t uSrcLen = sizeof(src16j)/2;
1171    wchar_t* wDest = NULL;
1172    int32_t wDestLen = 0;
1173    int32_t reqLen= 0 ;
1174    UBool failed = FALSE;
1175    UChar* uDest = NULL;
1176    int32_t uDestLen = 0;
1177    int i =0;
1178    {
1179        /* Bad UErrorCode arguments. Make sure that the API doesn't crash, and that Purify doesn't complain. */
1180        if (u_strFromWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1181            log_err("u_strFromWCS() should return NULL with a bad argument\n");
1182        }
1183        if (u_strToWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1184            log_err("u_strToWCS() should return NULL with a bad argument\n");
1185        }
1186
1187        /* NULL source & destination. */
1188        err = U_ZERO_ERROR;
1189        u_strFromWCS(NULL,0,NULL,NULL,0,&err);
1190        if (err != U_STRING_NOT_TERMINATED_WARNING) {
1191            log_err("u_strFromWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1192        }
1193        err = U_ZERO_ERROR;
1194        u_strToWCS(NULL,0,NULL,NULL,0,&err);
1195        if (err != U_STRING_NOT_TERMINATED_WARNING) {
1196            log_err("u_strToWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1197        }
1198        err = U_ZERO_ERROR;
1199
1200        /* pre-flight*/
1201        u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1202
1203        if(err == U_BUFFER_OVERFLOW_ERROR){
1204            err=U_ZERO_ERROR;
1205            wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1206            wDestLen = reqLen+1;
1207            u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1208        }
1209
1210        /* pre-flight */
1211        u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1212
1213
1214        if(err == U_BUFFER_OVERFLOW_ERROR){
1215            err =U_ZERO_ERROR;
1216            uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1217            uDestLen = reqLen + 1;
1218            u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1219        }else if(U_FAILURE(err)){
1220
1221            log_err("u_strFromWCS() failed. Error: %s \n", u_errorName(err));
1222            return;
1223        }
1224
1225        for(i=0; i< uSrcLen; i++){
1226            if(uDest[i] != src16j[i]){
1227                log_verbose("u_str*WCS() failed for unterminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1228                failed =TRUE;
1229            }
1230        }
1231
1232        if(U_FAILURE(err)){
1233            failed = TRUE;
1234        }
1235        if(failed){
1236            log_err("u_strToWCS() failed \n");
1237        }
1238        free(wDest);
1239        free(uDest);
1240
1241
1242        /* test with embeded nulls */
1243        uSrc = src16WithNulls;
1244        uSrcLen = sizeof(src16WithNulls)/2;
1245        wDestLen =0;
1246        uDestLen =0;
1247        wDest = NULL;
1248        uDest = NULL;
1249        /* pre-flight*/
1250        u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1251
1252        if(err == U_BUFFER_OVERFLOW_ERROR){
1253            err=U_ZERO_ERROR;
1254            wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1255            wDestLen = reqLen+1;
1256            u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1257        }
1258
1259        /* pre-flight */
1260        u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1261
1262        if(err == U_BUFFER_OVERFLOW_ERROR){
1263            err =U_ZERO_ERROR;
1264            uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1265            uDestLen = reqLen + 1;
1266            u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1267        }
1268
1269        if(!U_FAILURE(err)) {
1270         for(i=0; i< uSrcLen; i++){
1271            if(uDest[i] != src16WithNulls[i]){
1272                log_verbose("u_str*WCS() failed for string with nulls expected: \\u%04X got: \\u%04X at index: %i \n", src16WithNulls[i] ,uDest[i],i);
1273                failed =TRUE;
1274            }
1275         }
1276        }
1277
1278        if(U_FAILURE(err)){
1279            failed = TRUE;
1280        }
1281        if(failed){
1282            log_err("u_strToWCS() failed \n");
1283        }
1284        free(wDest);
1285        free(uDest);
1286
1287    }
1288
1289    {
1290
1291        uSrc = src16j;
1292        uSrcLen = sizeof(src16j)/2;
1293        wDestLen =0;
1294        uDestLen =0;
1295        wDest = NULL;
1296        uDest = NULL;
1297        wDestLen = 0;
1298        /* pre-flight*/
1299        u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1300
1301        if(err == U_BUFFER_OVERFLOW_ERROR){
1302            err=U_ZERO_ERROR;
1303            wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1304            wDestLen = reqLen+1;
1305            u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1306        }
1307        uDestLen = 0;
1308        /* pre-flight */
1309        u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1310
1311        if(err == U_BUFFER_OVERFLOW_ERROR){
1312            err =U_ZERO_ERROR;
1313            uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1314            uDestLen = reqLen + 1;
1315            u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1316        }
1317
1318
1319        if(!U_FAILURE(err)) {
1320         for(i=0; i< uSrcLen; i++){
1321            if(uDest[i] != src16j[i]){
1322                log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1323                failed =TRUE;
1324            }
1325         }
1326        }
1327
1328        if(U_FAILURE(err)){
1329            failed = TRUE;
1330        }
1331        if(failed){
1332            log_err("u_strToWCS() failed \n");
1333        }
1334        free(wDest);
1335        free(uDest);
1336    }
1337
1338    /*
1339     * Test u_terminateWChars().
1340     * All u_terminateXYZ() use the same implementation macro;
1341     * we test this function to improve API coverage.
1342     */
1343    {
1344        wchar_t buffer[10];
1345
1346        err=U_ZERO_ERROR;
1347        buffer[3]=0x20ac;
1348        wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1349        if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1350            log_err("u_terminateWChars(buffer, all, 3, zero) failed: %s length %d [3]==U+%04x\n",
1351                    u_errorName(err), wDestLen, buffer[3]);
1352        }
1353
1354        err=U_ZERO_ERROR;
1355        buffer[3]=0x20ac;
1356        wDestLen=u_terminateWChars(buffer, 3, 3, &err);
1357        if(err!=U_STRING_NOT_TERMINATED_WARNING || wDestLen!=3 || buffer[3]!=0x20ac) {
1358            log_err("u_terminateWChars(buffer, 3, 3, zero) failed: %s length %d [3]==U+%04x\n",
1359                    u_errorName(err), wDestLen, buffer[3]);
1360        }
1361
1362        err=U_STRING_NOT_TERMINATED_WARNING;
1363        buffer[3]=0x20ac;
1364        wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1365        if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1366            log_err("u_terminateWChars(buffer, all, 3, not-terminated) failed: %s length %d [3]==U+%04x\n",
1367                    u_errorName(err), wDestLen, buffer[3]);
1368        }
1369
1370        err=U_ZERO_ERROR;
1371        buffer[3]=0x20ac;
1372        wDestLen=u_terminateWChars(buffer, 2, 3, &err);
1373        if(err!=U_BUFFER_OVERFLOW_ERROR || wDestLen!=3 || buffer[3]!=0x20ac) {
1374            log_err("u_terminateWChars(buffer, 2, 3, zero) failed: %s length %d [3]==U+%04x\n",
1375                    u_errorName(err), wDestLen, buffer[3]);
1376        }
1377    }
1378#else
1379    log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1380#endif
1381}
1382
1383static void Test_widestrs()
1384{
1385#if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1386        wchar_t ws[100];
1387        UChar rts[100];
1388        int32_t wcap = UPRV_LENGTHOF(ws);
1389        int32_t wl;
1390        int32_t rtcap = UPRV_LENGTHOF(rts);
1391        int32_t rtl;
1392        wchar_t *wcs;
1393        UChar *cp;
1394        const char *errname;
1395        UChar ustr[] = {'h', 'e', 'l', 'l', 'o', 0};
1396        int32_t ul = UPRV_LENGTHOF(ustr) -1;
1397        char astr[100];
1398
1399        UErrorCode err;
1400
1401        err = U_ZERO_ERROR;
1402        wcs = u_strToWCS(ws, wcap, &wl, ustr, ul, &err);
1403        if (U_FAILURE(err)) {
1404                errname = u_errorName(err);
1405                log_err("test_widestrs: u_strToWCS error: %s!\n",errname);
1406        }
1407        if(ul!=wl){
1408            log_err("u_strToWCS: ustr = %s, ul = %d, ws = %S, wl = %d!\n", u_austrcpy(astr, ustr), ul, ws, wl);
1409        }
1410        err = U_ZERO_ERROR;
1411        wl = (int32_t)uprv_wcslen(wcs);
1412        cp = u_strFromWCS(rts, rtcap, &rtl, wcs, wl, &err);
1413        (void)cp;    /* Suppress set but not used warning. */
1414        if (U_FAILURE(err)) {
1415                errname = u_errorName(err);
1416                fprintf(stderr, "test_widestrs: ucnv_wcstombs error: %s!\n",errname);
1417        }
1418        if(wl != rtl){
1419            log_err("u_strFromWCS: wcs = %S, wl = %d,rts = %s, rtl = %d!\n", wcs, wl, u_austrcpy(astr, rts), rtl);
1420        }
1421#else
1422    log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1423#endif
1424}
1425
1426static void
1427Test_WCHART_LongString(){
1428#if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1429    UErrorCode status = U_ZERO_ERROR;
1430    const char* testdatapath=loadTestData(&status);
1431    UResourceBundle *theBundle = ures_open(testdatapath, "testtypes", &status);
1432    int32_t strLen =0;
1433    const UChar* str = ures_getStringByKey(theBundle, "testinclude",&strLen,&status);
1434    const UChar* uSrc = str;
1435    int32_t uSrcLen = strLen;
1436    int32_t wDestLen =0, reqLen=0, i=0;
1437    int32_t uDestLen =0;
1438    wchar_t* wDest = NULL;
1439    UChar* uDest = NULL;
1440    UBool failed = FALSE;
1441
1442    log_verbose("Loaded string of %d UChars\n", uSrcLen);
1443
1444    if(U_FAILURE(status)){
1445        log_data_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status));
1446        return;
1447    }
1448
1449    /* pre-flight*/
1450    u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1451
1452    if(status == U_BUFFER_OVERFLOW_ERROR){
1453        status=U_ZERO_ERROR;
1454        wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1455        wDestLen = reqLen+1;
1456        u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1457        log_verbose("To %d*%d-byte wchar_ts\n", reqLen,sizeof(wchar_t));
1458    }
1459
1460    {
1461      int j;
1462      for(j=0;j>=0&&j<reqLen;j++) {
1463        if(wDest[j]!=uSrc[j]) {
1464          log_verbose("Diff %04X vs %04X @ %d\n", wDest[j],uSrc[j],j);
1465          break;
1466        }
1467      }
1468    }
1469
1470    uDestLen = 0;
1471    /* pre-flight */
1472    u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1473    if(status == U_BUFFER_OVERFLOW_ERROR){
1474        status =U_ZERO_ERROR;
1475        uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1476        u_memset(uDest,0xFFFF,reqLen+1);
1477        uDestLen = reqLen + 1;
1478        u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1479        log_verbose("Back to %d UChars\n", reqLen);
1480    }
1481#if defined(U_WCHAR_IS_UTF16)
1482    log_verbose("U_WCHAR_IS_UTF16\n");
1483#elif defined(U_WCHAR_IS_UTF32)
1484    log_verbose("U_WCHAR_IS_UTF32\n");
1485#else
1486    log_verbose("U_WCHAR_IS_idunno (not UTF)\n");
1487#endif
1488
1489    if(reqLen!=uSrcLen) {
1490        log_err("Error: dest len is %d but expected src len %d\n", reqLen, uSrcLen);
1491    }
1492
1493    for(i=0; i< uSrcLen; i++){
1494        if(uDest[i] != str[i]){
1495            log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", str[i], uDest[i],i);
1496            failed =TRUE;
1497        }
1498    }
1499
1500    if(U_FAILURE(status)){
1501        failed = TRUE;
1502    }
1503    if(failed){
1504        log_err("u_strToWCS() failed \n");
1505    }
1506    free(wDest);
1507    free(uDest);
1508    /* close the bundle */
1509    ures_close(theBundle);
1510#else
1511    log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1512#endif
1513}
1514
1515static void Test_strToJavaModifiedUTF8() {
1516    static const UChar src[]={
1517        0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1518        0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1519        0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1520        0xdbff, 0xdfff,
1521        0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xed, 0xe0e, 0x6f
1522    };
1523    static const uint8_t expected[]={
1524        0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1525        0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1526        0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1527        0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xc0, 0x80,
1528        0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1529        0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xc3, 0xad, 0xe0, 0xb8, 0x8e, 0x6f
1530    };
1531    static const UChar shortSrc[]={
1532        0xe01, 0xe1, 0x61
1533    };
1534    static const uint8_t shortExpected[]={
1535        0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1536    };
1537    static const UChar asciiNul[]={
1538        0x61, 0x62, 0x63, 0
1539    };
1540    static const uint8_t asciiNulExpected[]={
1541        0x61, 0x62, 0x63
1542    };
1543    char dest[200];
1544    char *p;
1545    int32_t length, expectedTerminatedLength;
1546    UErrorCode errorCode;
1547
1548    expectedTerminatedLength=(int32_t)(strstr((const char *)expected, "\xc0\x80")-
1549                                       (const char *)expected);
1550
1551    errorCode=U_ZERO_ERROR;
1552    length=-5;
1553    p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1554                              src, UPRV_LENGTHOF(src), &errorCode);
1555    if( U_FAILURE(errorCode) || p!=dest ||
1556        length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1557        dest[length]!=0
1558    ) {
1559        log_err("u_strToJavaModifiedUTF8(normal) failed - %s\n", u_errorName(errorCode));
1560    }
1561    memset(dest, 0xff, sizeof(dest));
1562    errorCode=U_ZERO_ERROR;
1563    length=-5;
1564    p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL,
1565                              src, UPRV_LENGTHOF(src), &errorCode);
1566    if( U_FAILURE(errorCode) || p!=dest ||
1567        0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1568        dest[UPRV_LENGTHOF(expected)]!=0
1569    ) {
1570        log_err("u_strToJavaModifiedUTF8(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1571    }
1572    memset(dest, 0xff, sizeof(dest));
1573    errorCode=U_ZERO_ERROR;
1574    length=-5;
1575    p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected), &length,
1576                              src, UPRV_LENGTHOF(src), &errorCode);
1577    if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1578        length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1579        dest[length]!=(char)0xff
1580    ) {
1581        log_err("u_strToJavaModifiedUTF8(tight) failed - %s\n", u_errorName(errorCode));
1582    }
1583    memset(dest, 0xff, sizeof(dest));
1584    errorCode=U_ZERO_ERROR;
1585    length=-5;
1586    p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, src, -1, &errorCode);
1587    if( U_FAILURE(errorCode) || p!=dest ||
1588        length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1589        dest[length]!=0
1590    ) {
1591        log_err("u_strToJavaModifiedUTF8(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1592    }
1593    memset(dest, 0xff, sizeof(dest));
1594    errorCode=U_ZERO_ERROR;
1595    length=-5;
1596    p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, src, -1, &errorCode);
1597    if( U_FAILURE(errorCode) || p!=dest ||
1598        0!=memcmp(dest, expected, expectedTerminatedLength) ||
1599        dest[expectedTerminatedLength]!=0
1600    ) {
1601        log_err("u_strToJavaModifiedUTF8(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1602    }
1603    memset(dest, 0xff, sizeof(dest));
1604    errorCode=U_ZERO_ERROR;
1605    length=-5;
1606    p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected)/2, &length,
1607                              src, UPRV_LENGTHOF(src), &errorCode);
1608    if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1609        length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=(char)0xff
1610    ) {
1611        log_err("u_strToJavaModifiedUTF8(overflow) failed - %s\n", u_errorName(errorCode));
1612    }
1613    memset(dest, 0xff, sizeof(dest));
1614    errorCode=U_ZERO_ERROR;
1615    length=-5;
1616    p=u_strToJavaModifiedUTF8(NULL, 0, &length,
1617                              src, UPRV_LENGTHOF(src), &errorCode);
1618    if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1619        length!=UPRV_LENGTHOF(expected) || dest[0]!=(char)0xff
1620    ) {
1621        log_err("u_strToJavaModifiedUTF8(pure preflighting) failed - %s\n", u_errorName(errorCode));
1622    }
1623    memset(dest, 0xff, sizeof(dest));
1624    errorCode=U_ZERO_ERROR;
1625    length=-5;
1626    p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1627                              shortSrc, UPRV_LENGTHOF(shortSrc), &errorCode);
1628    if( U_FAILURE(errorCode) || p!=dest ||
1629        length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1630        dest[length]!=0
1631    ) {
1632        log_err("u_strToJavaModifiedUTF8(short) failed - %s\n", u_errorName(errorCode));
1633    }
1634    memset(dest, 0xff, sizeof(dest));
1635    errorCode=U_ZERO_ERROR;
1636    length=-5;
1637    p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1638                              asciiNul, -1, &errorCode);
1639    if( U_FAILURE(errorCode) || p!=dest ||
1640        length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1641        dest[length]!=0
1642    ) {
1643        log_err("u_strToJavaModifiedUTF8(asciiNul) failed - %s\n", u_errorName(errorCode));
1644    }
1645    memset(dest, 0xff, sizeof(dest));
1646    errorCode=U_ZERO_ERROR;
1647    length=-5;
1648    p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1649                              NULL, 0, &errorCode);
1650    if( U_FAILURE(errorCode) || p!=dest ||
1651        length!=0 || dest[0]!=0
1652    ) {
1653        log_err("u_strToJavaModifiedUTF8(empty) failed - %s\n", u_errorName(errorCode));
1654    }
1655
1656    /* illegal arguments */
1657    memset(dest, 0xff, sizeof(dest));
1658    errorCode=U_ZERO_ERROR;
1659    length=-5;
1660    p=u_strToJavaModifiedUTF8(NULL, sizeof(dest), &length,
1661                              src, UPRV_LENGTHOF(src), &errorCode);
1662    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1663        log_err("u_strToJavaModifiedUTF8(dest=NULL) failed - %s\n", u_errorName(errorCode));
1664    }
1665    memset(dest, 0xff, sizeof(dest));
1666    errorCode=U_ZERO_ERROR;
1667    length=-5;
1668    p=u_strToJavaModifiedUTF8(dest, -1, &length,
1669                              src, UPRV_LENGTHOF(src), &errorCode);
1670    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1671        log_err("u_strToJavaModifiedUTF8(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1672    }
1673    memset(dest, 0xff, sizeof(dest));
1674    errorCode=U_ZERO_ERROR;
1675    length=-5;
1676    p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1677                              NULL, UPRV_LENGTHOF(src), &errorCode);
1678    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1679        log_err("u_strToJavaModifiedUTF8(src=NULL) failed - %s\n", u_errorName(errorCode));
1680    }
1681    memset(dest, 0xff, sizeof(dest));
1682    errorCode=U_ZERO_ERROR;
1683    length=-5;
1684    p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1685                              NULL, -1, &errorCode);
1686    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1687        log_err("u_strToJavaModifiedUTF8(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1688    }
1689}
1690
1691static void Test_strFromJavaModifiedUTF8() {
1692    static const uint8_t src[]={
1693        0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1694        0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1695        0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1696        0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0,
1697        0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1698        0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80,  /* invalid sequences */
1699        0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1700        0xe0, 0x81, 0xac, 0xe0, 0x83, 0xad,  /* non-shortest forms are allowed */
1701        0xe0, 0xb8, 0x8e, 0x6f
1702    };
1703    static const UChar expected[]={
1704        0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1705        0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1706        0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1707        0xdbff, 0xdfff,
1708        0xfffd, 0xfffd, 0xfffd, 0xfffd,
1709        0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1710        0x6c, 0xed,
1711        0xe0e, 0x6f
1712    };
1713    static const uint8_t shortSrc[]={
1714        0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1715    };
1716    static const UChar shortExpected[]={
1717        0xe01, 0xe1, 0x61
1718    };
1719    static const uint8_t asciiNul[]={
1720        0x61, 0x62, 0x63, 0
1721    };
1722    static const UChar asciiNulExpected[]={
1723        0x61, 0x62, 0x63
1724    };
1725    static const uint8_t invalid[]={
1726        0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80
1727    };
1728    static const UChar invalidExpectedFFFD[]={
1729        0xfffd, 0xfffd, 0xfffd, 0xfffd
1730    };
1731    static const UChar invalidExpected50000[]={
1732        0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00
1733    };
1734    UChar dest[200];
1735    UChar *p;
1736    int32_t length, expectedTerminatedLength;
1737    int32_t numSubstitutions;
1738    UErrorCode errorCode;
1739
1740    expectedTerminatedLength=(int32_t)(u_strchr(expected, 0)-expected);
1741
1742    errorCode=U_ZERO_ERROR;
1743    length=numSubstitutions=-5;
1744    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1745                                       (const char *)src, UPRV_LENGTHOF(src),
1746                                       0xfffd, &numSubstitutions, &errorCode);
1747    if( U_FAILURE(errorCode) || p!=dest ||
1748        length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1749        dest[length]!=0 ||
1750        numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1751    ) {
1752        log_err("u_strFromJavaModifiedUTF8WithSub(normal) failed - %s\n", u_errorName(errorCode));
1753    }
1754    memset(dest, 0xff, sizeof(dest));
1755    errorCode=U_ZERO_ERROR;
1756    length=numSubstitutions=-5;
1757    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1758                                       (const char *)src, UPRV_LENGTHOF(src),
1759                                       0xfffd, &numSubstitutions, &errorCode);
1760    if( U_FAILURE(errorCode) || p!=dest ||
1761        0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1762        dest[UPRV_LENGTHOF(expected)]!=0 ||
1763        numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1764    ) {
1765        log_err("u_strFromJavaModifiedUTF8WithSub(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1766    }
1767    memset(dest, 0xff, sizeof(dest));
1768    errorCode=U_ZERO_ERROR;
1769    length=numSubstitutions=-5;
1770    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1771                                       (const char *)src, UPRV_LENGTHOF(src),
1772                                       0xfffd, NULL, &errorCode);
1773    if( U_FAILURE(errorCode) || p!=dest ||
1774        length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1775        dest[length]!=0
1776    ) {
1777        log_err("u_strFromJavaModifiedUTF8WithSub(normal, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1778    }
1779    memset(dest, 0xff, sizeof(dest));
1780    errorCode=U_ZERO_ERROR;
1781    length=numSubstitutions=-5;
1782    p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected), &length,
1783                                       (const char *)src, UPRV_LENGTHOF(src),
1784                                       0xfffd, &numSubstitutions, &errorCode);
1785    if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1786        length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1787        dest[length]!=0xffff ||
1788        numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1789    ) {
1790        log_err("u_strFromJavaModifiedUTF8WithSub(tight) failed - %s\n", u_errorName(errorCode));
1791    }
1792    memset(dest, 0xff, sizeof(dest));
1793    errorCode=U_ZERO_ERROR;
1794    length=numSubstitutions=-5;
1795    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1796                                       (const char *)src, -1,
1797                                       0xfffd, &numSubstitutions, &errorCode);
1798    if( U_FAILURE(errorCode) || p!=dest ||
1799        length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1800        dest[length]!=0 ||
1801        numSubstitutions!=0
1802    ) {
1803        log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1804    }
1805    memset(dest, 0xff, sizeof(dest));
1806    errorCode=U_ZERO_ERROR;
1807    length=numSubstitutions=-5;
1808    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1809                                       (const char *)src, -1,
1810                                       0xfffd, &numSubstitutions, &errorCode);
1811    if( U_FAILURE(errorCode) || p!=dest ||
1812        0!=memcmp(dest, expected, expectedTerminatedLength) ||
1813        dest[expectedTerminatedLength]!=0 ||
1814        numSubstitutions!=0
1815    ) {
1816        log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1817    }
1818    memset(dest, 0xff, sizeof(dest));
1819    errorCode=U_ZERO_ERROR;
1820    length=numSubstitutions=-5;
1821    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1822                                       (const char *)src, -1,
1823                                       0xfffd, NULL, &errorCode);
1824    if( U_FAILURE(errorCode) || p!=dest ||
1825        length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1826        dest[length]!=0
1827    ) {
1828        log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1829    }
1830    memset(dest, 0xff, sizeof(dest));
1831    errorCode=U_ZERO_ERROR;
1832    length=numSubstitutions=-5;
1833    p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected)/2, &length,
1834                                       (const char *)src, UPRV_LENGTHOF(src),
1835                                       0xfffd, &numSubstitutions, &errorCode);
1836    if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1837        length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=0xffff
1838    ) {
1839        log_err("u_strFromJavaModifiedUTF8WithSub(overflow) failed - %s\n", u_errorName(errorCode));
1840    }
1841    memset(dest, 0xff, sizeof(dest));
1842    errorCode=U_ZERO_ERROR;
1843    length=numSubstitutions=-5;
1844    p=u_strFromJavaModifiedUTF8WithSub(NULL, 0, &length,
1845                                       (const char *)src, UPRV_LENGTHOF(src),
1846                                       0xfffd, &numSubstitutions, &errorCode);
1847    if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1848        length!=UPRV_LENGTHOF(expected) || dest[0]!=0xffff
1849    ) {
1850        log_err("u_strFromJavaModifiedUTF8WithSub(pure preflighting) failed - %s\n", u_errorName(errorCode));
1851    }
1852    memset(dest, 0xff, sizeof(dest));
1853    errorCode=U_ZERO_ERROR;
1854    length=numSubstitutions=-5;
1855    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1856                                       (const char *)shortSrc, UPRV_LENGTHOF(shortSrc),
1857                                       0xfffd, &numSubstitutions, &errorCode);
1858    if( U_FAILURE(errorCode) || p!=dest ||
1859        length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1860        dest[length]!=0 ||
1861        numSubstitutions!=0
1862    ) {
1863        log_err("u_strFromJavaModifiedUTF8WithSub(short) failed - %s\n", u_errorName(errorCode));
1864    }
1865    memset(dest, 0xff, sizeof(dest));
1866    errorCode=U_ZERO_ERROR;
1867    length=numSubstitutions=-5;
1868    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1869                                       (const char *)asciiNul, -1,
1870                                       0xfffd, &numSubstitutions, &errorCode);
1871    if( U_FAILURE(errorCode) || p!=dest ||
1872        length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1873        dest[length]!=0 ||
1874        numSubstitutions!=0
1875    ) {
1876        log_err("u_strFromJavaModifiedUTF8WithSub(asciiNul) failed - %s\n", u_errorName(errorCode));
1877    }
1878    memset(dest, 0xff, sizeof(dest));
1879    errorCode=U_ZERO_ERROR;
1880    length=numSubstitutions=-5;
1881    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1882                                       NULL, 0, 0xfffd, &numSubstitutions, &errorCode);
1883    if( U_FAILURE(errorCode) || p!=dest ||
1884        length!=0 || dest[0]!=0 ||
1885        numSubstitutions!=0
1886    ) {
1887        log_err("u_strFromJavaModifiedUTF8WithSub(empty) failed - %s\n", u_errorName(errorCode));
1888    }
1889    memset(dest, 0xff, sizeof(dest));
1890    errorCode=U_ZERO_ERROR;
1891    length=numSubstitutions=-5;
1892    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1893                                       (const char *)invalid, UPRV_LENGTHOF(invalid),
1894                                       0xfffd, &numSubstitutions, &errorCode);
1895    if( U_FAILURE(errorCode) || p!=dest ||
1896        length!=UPRV_LENGTHOF(invalidExpectedFFFD) || 0!=memcmp(dest, invalidExpectedFFFD, length) ||
1897        dest[length]!=0 ||
1898        numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1899    ) {
1900        log_err("u_strFromJavaModifiedUTF8WithSub(invalid->fffd) failed - %s\n", u_errorName(errorCode));
1901    }
1902    memset(dest, 0xff, sizeof(dest));
1903    errorCode=U_ZERO_ERROR;
1904    length=numSubstitutions=-5;
1905    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1906                                       (const char *)invalid, UPRV_LENGTHOF(invalid),
1907                                       0x50000, &numSubstitutions, &errorCode);
1908    if( U_FAILURE(errorCode) || p!=dest ||
1909        length!=UPRV_LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) ||
1910        dest[length]!=0 ||
1911        numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)  /* not ...50000 */
1912    ) {
1913        log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode));
1914    }
1915    memset(dest, 0xff, sizeof(dest));
1916    errorCode=U_ZERO_ERROR;
1917    length=numSubstitutions=-5;
1918    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1919                                       (const char *)invalid, UPRV_LENGTHOF(invalid),
1920                                       U_SENTINEL, &numSubstitutions, &errorCode);
1921    if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) {
1922        log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode));
1923    }
1924    memset(dest, 0xff, sizeof(dest));
1925    errorCode=U_ZERO_ERROR;
1926    length=numSubstitutions=-5;
1927    p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1928                                       (const char *)src, UPRV_LENGTHOF(src),
1929                                       U_SENTINEL, &numSubstitutions, &errorCode);
1930    if( errorCode!=U_INVALID_CHAR_FOUND ||
1931        length>=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)-1]!=0xffff ||
1932        numSubstitutions!=0
1933    ) {
1934        log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode));
1935    }
1936
1937    /* illegal arguments */
1938    memset(dest, 0xff, sizeof(dest));
1939    errorCode=U_ZERO_ERROR;
1940    length=numSubstitutions=-5;
1941    p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length,
1942                                       (const char *)src, UPRV_LENGTHOF(src),
1943                                       0xfffd, &numSubstitutions, &errorCode);
1944    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1945        log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode));
1946    }
1947    memset(dest, 0xff, sizeof(dest));
1948    errorCode=U_ZERO_ERROR;
1949    length=numSubstitutions=-5;
1950    p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length,
1951                                       (const char *)src, UPRV_LENGTHOF(src),
1952                                       0xfffd, &numSubstitutions, &errorCode);
1953    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1954        log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1955    }
1956    memset(dest, 0xff, sizeof(dest));
1957    errorCode=U_ZERO_ERROR;
1958    length=numSubstitutions=-5;
1959    p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1960                                       NULL, UPRV_LENGTHOF(src),
1961                                       0xfffd, &numSubstitutions, &errorCode);
1962    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1963        log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode));
1964    }
1965    memset(dest, 0xff, sizeof(dest));
1966    errorCode=U_ZERO_ERROR;
1967    length=numSubstitutions=-5;
1968    p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1969                                       NULL, -1, 0xfffd, &numSubstitutions, &errorCode);
1970    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1971        log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1972    }
1973    memset(dest, 0xff, sizeof(dest));
1974    errorCode=U_ZERO_ERROR;
1975    length=numSubstitutions=-5;
1976    p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1977                                       (const char *)src, UPRV_LENGTHOF(src),
1978                                       0x110000, &numSubstitutions, &errorCode);
1979    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1980        log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode));
1981    }
1982    memset(dest, 0xff, sizeof(dest));
1983    errorCode=U_ZERO_ERROR;
1984    length=numSubstitutions=-5;
1985    p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1986                                       (const char *)src, UPRV_LENGTHOF(src),
1987                                       0xdfff, &numSubstitutions, &errorCode);
1988    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1989        log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode));
1990    }
1991}
1992
1993/* test that string transformation functions permit NULL source pointer when source length==0 */
1994static void TestNullEmptySource() {
1995    char dest8[4]={ 3, 3, 3, 3 };
1996    UChar dest16[4]={ 3, 3, 3, 3 };
1997    UChar32 dest32[4]={ 3, 3, 3, 3 };
1998#if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1999    wchar_t destW[4]={ 3, 3, 3, 3 };
2000#endif
2001
2002    int32_t length;
2003    UErrorCode errorCode;
2004
2005    /* u_strFromXyz() */
2006
2007    dest16[0]=3;
2008    length=3;
2009    errorCode=U_ZERO_ERROR;
2010    u_strFromUTF8(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2011    if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2012        log_err("u_strFromUTF8(source=NULL, sourceLength=0) failed\n");
2013    }
2014
2015    dest16[0]=3;
2016    length=3;
2017    errorCode=U_ZERO_ERROR;
2018    u_strFromUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2019    if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2020        log_err("u_strFromUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2021    }
2022
2023    dest16[0]=3;
2024    length=3;
2025    errorCode=U_ZERO_ERROR;
2026    u_strFromUTF8Lenient(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2027    if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2028        log_err("u_strFromUTF8Lenient(source=NULL, sourceLength=0) failed\n");
2029    }
2030
2031    dest16[0]=3;
2032    length=3;
2033    errorCode=U_ZERO_ERROR;
2034    u_strFromUTF32(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2035    if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2036        log_err("u_strFromUTF32(source=NULL, sourceLength=0) failed\n");
2037    }
2038
2039    dest16[0]=3;
2040    length=3;
2041    errorCode=U_ZERO_ERROR;
2042    u_strFromUTF32WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2043    if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2044        log_err("u_strFromUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2045    }
2046
2047    dest16[0]=3;
2048    length=3;
2049    errorCode=U_ZERO_ERROR;
2050    u_strFromJavaModifiedUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2051    if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2052        log_err("u_strFromJavaModifiedUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2053    }
2054
2055    /* u_strToXyz() */
2056
2057    dest8[0]=3;
2058    length=3;
2059    errorCode=U_ZERO_ERROR;
2060    u_strToUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2061    if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2062        log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2063    }
2064
2065    dest8[0]=3;
2066    length=3;
2067    errorCode=U_ZERO_ERROR;
2068    u_strToUTF8WithSub(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2069    if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2070        log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2071    }
2072
2073    dest32[0]=3;
2074    length=3;
2075    errorCode=U_ZERO_ERROR;
2076    u_strToUTF32(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, &errorCode);
2077    if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2078        log_err("u_strToUTF32(source=NULL, sourceLength=0) failed\n");
2079    }
2080
2081    dest32[0]=3;
2082    length=3;
2083    errorCode=U_ZERO_ERROR;
2084    u_strToUTF32WithSub(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2085    if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2086        log_err("u_strToUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2087    }
2088
2089    dest8[0]=3;
2090    length=3;
2091    errorCode=U_ZERO_ERROR;
2092    u_strToJavaModifiedUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2093    if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2094        log_err("u_strToJavaModifiedUTF8(source=NULL, sourceLength=0) failed\n");
2095    }
2096
2097#if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
2098
2099    dest16[0]=3;
2100    length=3;
2101    errorCode=U_ZERO_ERROR;
2102    u_strFromWCS(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2103    if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2104        log_err("u_strFromWCS(source=NULL, sourceLength=0) failed\n");
2105    }
2106
2107    destW[0]=3;
2108    length=3;
2109    errorCode=U_ZERO_ERROR;
2110    u_strToWCS(destW, UPRV_LENGTHOF(destW), &length, NULL, 0, &errorCode);
2111    if(errorCode!=U_ZERO_ERROR || length!=0 || destW[0]!=0 || destW[1]!=3) {
2112        log_err("u_strToWCS(source=NULL, sourceLength=0) failed\n");
2113    }
2114
2115#endif
2116}
2117