1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*****************************************************************************
7*
8* File CCONVTST.C
9*
10* Modification History:
11*        Name                     Description
12*   Madhu Katragadda              7/7/2000        Converter Tests for extended code coverage
13******************************************************************************
14*/
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include "unicode/uloc.h"
19#include "unicode/ucnv.h"
20#include "unicode/utypes.h"
21#include "unicode/ustring.h"
22#include "unicode/uset.h"
23#include "cintltst.h"
24
25#define MAX_LENGTH 999
26
27#define UNICODE_LIMIT 0x10FFFF
28#define SURROGATE_HIGH_START    0xD800
29#define SURROGATE_LOW_END       0xDFFF
30
31static int32_t  gInBufferSize = 0;
32static int32_t  gOutBufferSize = 0;
33static char     gNuConvTestName[1024];
34
35#define nct_min(x,y)  ((x<y) ? x : y)
36#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
37
38static void printSeq(const unsigned char* a, int len);
39static void printSeqErr(const unsigned char* a, int len);
40static void printUSeq(const UChar* a, int len);
41static void printUSeqErr(const UChar* a, int len);
42static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
43                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
44static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
45               const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
46
47static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
48                const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset);
49static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
50               const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset);
51
52static void setNuConvTestName(const char *codepage, const char *direction)
53{
54    sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
55        codepage,
56        direction,
57        (int)gInBufferSize,
58        (int)gOutBufferSize);
59}
60
61
62static void TestSurrogateBehaviour(void);
63static void TestErrorBehaviour(void);
64
65#if !UCONFIG_NO_LEGACY_CONVERSION
66static void TestToUnicodeErrorBehaviour(void);
67static void TestGetNextErrorBehaviour(void);
68#endif
69
70static void TestRegressionUTF8(void);
71static void TestRegressionUTF32(void);
72static void TestAvailableConverters(void);
73static void TestFlushInternalBuffer(void);  /*for improved code coverage in ucnv_cnv.c*/
74static void TestResetBehaviour(void);
75static void TestTruncated(void);
76static void TestUnicodeSet(void);
77
78static void TestWithBufferSize(int32_t osize, int32_t isize);
79
80
81static void printSeq(const unsigned char* a, int len)
82{
83    int i=0;
84    log_verbose("\n{");
85    while (i<len)
86        log_verbose("0x%02X ", a[i++]);
87    log_verbose("}\n");
88}
89
90static void printUSeq(const UChar* a, int len)
91{
92    int i=0;
93    log_verbose("\n{");
94    while (i<len)
95        log_verbose("%0x04X ", a[i++]);
96    log_verbose("}\n");
97}
98
99static void printSeqErr(const unsigned char* a, int len)
100{
101    int i=0;
102    fprintf(stderr, "\n{");
103    while (i<len)  fprintf(stderr, "0x%02X ", a[i++]);
104    fprintf(stderr, "}\n");
105}
106
107static void printUSeqErr(const UChar* a, int len)
108{
109    int i=0;
110    fprintf(stderr, "\n{");
111    while (i<len)
112        fprintf(stderr, "0x%04X ", a[i++]);
113    fprintf(stderr,"}\n");
114}
115
116void addExtraTests(TestNode** root);
117
118void addExtraTests(TestNode** root)
119{
120     addTest(root, &TestSurrogateBehaviour,         "tsconv/ncnvtst/TestSurrogateBehaviour");
121     addTest(root, &TestErrorBehaviour,             "tsconv/ncnvtst/TestErrorBehaviour");
122
123#if !UCONFIG_NO_LEGACY_CONVERSION
124     addTest(root, &TestToUnicodeErrorBehaviour,    "tsconv/ncnvtst/ToUnicodeErrorBehaviour");
125     addTest(root, &TestGetNextErrorBehaviour,      "tsconv/ncnvtst/TestGetNextErrorBehaviour");
126#endif
127
128     addTest(root, &TestAvailableConverters,        "tsconv/ncnvtst/TestAvailableConverters");
129     addTest(root, &TestFlushInternalBuffer,        "tsconv/ncnvtst/TestFlushInternalBuffer");
130     addTest(root, &TestResetBehaviour,             "tsconv/ncnvtst/TestResetBehaviour");
131     addTest(root, &TestRegressionUTF8,             "tsconv/ncnvtst/TestRegressionUTF8");
132     addTest(root, &TestRegressionUTF32,            "tsconv/ncnvtst/TestRegressionUTF32");
133     addTest(root, &TestTruncated,                  "tsconv/ncnvtst/TestTruncated");
134     addTest(root, &TestUnicodeSet,                 "tsconv/ncnvtst/TestUnicodeSet");
135}
136
137/*test surrogate behaviour*/
138static void TestSurrogateBehaviour(){
139    log_verbose("Testing for SBCS and LATIN_1\n");
140    {
141        UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032};
142        const uint8_t expected[] = {0x31, 0x1a, 0x32};
143
144#if !UCONFIG_NO_LEGACY_CONVERSION
145        /*SBCS*/
146        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
147                expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR))
148            log_err("u-> ibm-920 [UCNV_SBCS] not match.\n");
149#endif
150
151        /*LATIN_1*/
152        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
153                expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR ))
154            log_err("u-> LATIN_1 not match.\n");
155
156    }
157
158#if !UCONFIG_NO_LEGACY_CONVERSION
159    log_verbose("Testing for DBCS and MBCS\n");
160    {
161        UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
162        const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
163        int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
164
165        /*DBCS*/
166        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
167                expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
168            log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
169        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
170                expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR))
171            log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
172        /*MBCS*/
173        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
174                expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
175            log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
176        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
177                expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR))
178            log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
179    }
180
181    log_verbose("Testing for ISO-2022-jp\n");
182    {
183        UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
184
185        const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
186                                    0x31,0x1A, 0x32};
187
188
189        int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
190
191        // iso-2022-jp  android-change
192        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
193                expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR))
194            log_err("u-> not match.\n");
195        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
196                expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR))
197            log_err("u->  not match.\n");
198    }
199
200   /* BEGIN android-removed */
201   /* To save space, Android does not build full ISO-2022-CN tables.
202      We skip the tests for ISO-2022-CN. */
203   /*
204    log_verbose("Testing for ISO-2022-cn\n");
205    {
206        static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
207
208        static const uint8_t expected[] = {
209                                    0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
210                                    0x36, 0x21,
211                                    0x0F, 0x31,
212                                    0x1A,
213                                    0x32
214                                    };
215
216
217
218        static const int32_t offsets[] = {
219                                    0,    0,    0,    0,    0,    0,    0,
220                                    1,    1,
221                                    2,    2,
222                                    3,
223                                    5,  };
224
225        // iso-2022-CN  android-change
226        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
227                expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR))
228            log_err("u-> not match.\n");
229        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
230                expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR))
231            log_err("u-> not match.\n");
232    }
233    */
234    /* END android-removed */
235
236        log_verbose("Testing for ISO-2022-kr\n");
237    {
238        static const UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
239
240        static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
241                                    0x0E, 0x6C, 0x69,
242                                    0x0f, 0x1A,
243                                    0x0e, 0x6F, 0x4B,
244                                    0x0F, 0x31,
245                                    0x1A,
246                                    0x32 };
247
248        static const int32_t offsets[] = {-1, -1, -1, -1,
249                              0, 0, 0,
250                              1, 1,
251                              3, 3, 3,
252                              4, 4,
253                              5,
254                              7,
255                            };
256
257        // iso-2022-kr  android-change
258        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
259                expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR))
260            log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
261        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
262                expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR))
263            log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
264    }
265
266        log_verbose("Testing for HZ\n");
267    {
268        static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
269
270        static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
271                                    0x7E, 0x7D, 0x1A,
272                                    0x7E, 0x7B, 0x36, 0x21,
273                                    0x7E, 0x7D, 0x31,
274                                    0x1A,
275                                    0x32 };
276
277
278        static const int32_t offsets[] = {0,0,0,0,
279                             1,1,1,
280                             3,3,3,3,
281                             4,4,4,
282                             5,
283                             7,};
284
285        /*hz*/
286        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
287                expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR))
288            log_err("u-> HZ not match.\n");
289        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
290                expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR))
291            log_err("u-> HZ not match.\n");
292    }
293#endif
294
295    /*UTF-8*/
296     log_verbose("Testing for UTF8\n");
297    {
298        static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
299        static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
300                           0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
301                           0x04, 0x06 };
302        static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
303            0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
304
305
306        static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
307        /*UTF-8*/
308        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
309            expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR ))
310            log_err("u-> UTF8 with offsets and flush true did not match.\n");
311        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
312            expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR ))
313            log_err("u-> UTF8 with offsets and flush true did not match.\n");
314        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
315            expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR ))
316            log_err("u-> UTF8 with offsets and flush true did not match.\n");
317        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
318            expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR ))
319            log_err("u-> UTF8 with offsets and flush true did not match.\n");
320
321        if(!convertToU(expected, sizeof(expected),
322            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR ))
323            log_err("UTF8 -> u did not match.\n");
324        if(!convertToU(expected, sizeof(expected),
325            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR ))
326            log_err("UTF8 -> u did not match.\n");
327        if(!convertToU(expected, sizeof(expected),
328            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR ))
329            log_err("UTF8 ->u  did not match.\n");
330        if(!convertToU(expected, sizeof(expected),
331            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR ))
332            log_err("UTF8 -> u did not match.\n");
333
334    }
335}
336
337/*test various error behaviours*/
338static void TestErrorBehaviour(){
339    log_verbose("Testing for SBCS and LATIN_1\n");
340    {
341        static const UChar    sampleText[] =   { 0x0031, 0xd801};
342        static const UChar    sampleText2[] =   { 0x0031, 0xd801, 0x0032};
343        static const uint8_t expected0[] =          { 0x31};
344        static const uint8_t expected[] =          { 0x31, 0x1a};
345        static const uint8_t expected2[] =         { 0x31, 0x1a, 0x32};
346
347#if !UCONFIG_NO_LEGACY_CONVERSION
348        /*SBCS*/
349        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
350                expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR))
351            log_err("u-> ibm-920 [UCNV_SBCS] \n");
352        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
353                expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR))
354            log_err("u-> ibm-920 [UCNV_SBCS] \n");
355        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
356                expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR))
357            log_err("u-> ibm-920 [UCNV_SBCS] did not match\n");
358#endif
359
360        /*LATIN_1*/
361        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
362                expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
363            log_err("u-> LATIN_1 is supposed to fail\n");
364        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
365                expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR))
366            log_err("u-> LATIN_1 is supposed to fail\n");
367
368        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
369                expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
370            log_err("u-> LATIN_1 did not match\n");
371    }
372
373#if !UCONFIG_NO_LEGACY_CONVERSION
374    log_verbose("Testing for DBCS and MBCS\n");
375    {
376        static const UChar    sampleText[]    = { 0x00a1, 0xd801};
377        static const uint8_t expected[] = { 0xa2, 0xae};
378        static const int32_t offsets[]        = { 0x00, 0x00};
379        static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0};
380        static const int32_t offsetsSUB[]        = { 0x00, 0x00, 0x01, 0x01};
381
382        static const UChar       sampleText2[] = { 0x00a1, 0xd801, 0x00a4};
383        static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
384        static const int32_t offsets2[]        = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02};
385
386        static const UChar       sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01};
387        static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0};
388        static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x01, 0x02, 0x02};
389
390        static const UChar       sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01};
391        static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe};
392        static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 };
393
394        /*DBCS*/
395        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
396                expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
397            log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
398        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
399                expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
400            log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
401
402        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
403                expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR))
404            log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
405        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
406                expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
407            log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
408
409
410        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
411                expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
412            log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
413        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
414                expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR))
415            log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
416
417        /*MBCS*/
418        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
419                expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
420            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
421        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
422                expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
423            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
424
425        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
426                expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
427            log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
428        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
429                expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
430            log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
431        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
432                expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR))
433            log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
434
435        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
436                expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR))
437            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
438        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
439                expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR))
440            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
441
442        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
443                expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, TRUE, U_ZERO_ERROR))
444            log_err("u-> euc-jp [UCNV_MBCS] \n");
445        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
446                expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, FALSE, U_ZERO_ERROR))
447            log_err("u-> euc-jp [UCNV_MBCS] \n");
448    }
449
450    // iso-2022-jp
451    log_verbose("Testing for iso-2022-jp\n");
452    {
453        static const UChar    sampleText[]    = { 0x0031, 0xd801};
454        static const uint8_t expected[] = {  0x31};
455        static const uint8_t expectedSUB[] = {  0x31, 0x1a};
456        static const int32_t offsets[]        = { 0x00, 1};
457
458        static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
459        static const uint8_t expected2[] = {  0x31,0x1A,0x32};
460        static const int32_t offsets2[]        = { 0x00,0x01,0x02};
461
462        static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
463        static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a};
464        static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 };
465        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
466                expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR))
467            log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
468        // Google Patch: Change expected result code from U_AMBIGUOUS_ALIAS_WARNING to U_ZERO_ERROR.
469        //               Introduced with ICU 51.1.
470        //               Markus says this warning can occur when the set of available converters is changed,
471        //               and that it's not worth looking into in further detail.
472        //               Note: public ICU was U_ZERO_ERROR prior to ICU 51.
473        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
474                expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_ZERO_ERROR))
475            log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
476        // End of Google Patch.
477
478        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
479                expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR))
480            log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n");
481        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
482                expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
483            log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
484        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
485                expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
486            log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
487
488        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
489                expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
490            log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
491        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
492                expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
493            log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
494    }
495
496    /* BEGIN android-removed */
497    /* To save space, Android does not build full ISO-2022-CN tables.
498       We skip the tests for ISO-2022-CN. */
499    /*
500    // iso-2022-cn  android-change
501    log_verbose("Testing for iso-2022-cn\n");
502    {
503        static const UChar    sampleText[]    = { 0x0031, 0xd801};
504        static const uint8_t expected[] = { 0x31};
505        static const uint8_t expectedSUB[] = { 0x31, 0x1A};
506        static const int32_t offsets[]        = { 0x00, 1};
507
508        static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
509        static const uint8_t expected2[] = { 0x31, 0x1A,0x32};
510        static const int32_t offsets2[]        = { 0x00, 0x01,0x02};
511
512        static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
513        static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A};
514        static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x02 };
515
516        static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
517        static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a };
518        static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 };
519        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
520                expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR))
521            log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
522        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
523                expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR))
524            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
525
526        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
527                expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR))
528            log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n");
529        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
530                expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
531            log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
532        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
533                expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
534            log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
535
536        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
537                expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR))
538            log_err("u->iso-2022-cn [UCNV_MBCS] \n");
539        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
540                expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR))
541            log_err("u-> iso-2022-cn[UCNV_MBCS] \n");
542
543        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
544                expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR))
545            log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
546        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
547                expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR))
548            log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
549    }
550    */
551    /* END android-removed */
552
553    // iso-2022-kr  android-change
554    log_verbose("Testing for iso-2022-kr\n");
555    {
556        static const UChar    sampleText[]    = { 0x0031, 0xd801};
557        static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31};
558        static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A};
559        static const int32_t offsets[]        = { -1,   -1,   -1,   -1,   0x00, 1};
560
561        static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
562        static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32};
563        static const int32_t offsets2[]        = { -1,   -1,   -1,   -1,   0x00, 0x01, 0x02};
564
565        static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
566        static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43,  0x51, 0x50, 0x1A };
567        static const int32_t offsets3MBCS[]        = { -1,   -1,   -1,   -1,    0x00, 0x01, 0x02, 0x02 };
568
569        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
570                expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR))
571            log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
572        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
573                expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR))
574            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
575
576        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
577                expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR))
578            log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n");
579        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
580                expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
581            log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
582        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
583                expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
584            log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
585
586        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
587                expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR))
588            log_err("u->iso-2022-kr [UCNV_MBCS] \n");
589        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
590                expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR))
591            log_err("u-> iso-2022-kr[UCNV_MBCS] \n");
592    }
593
594    /*HZ*/
595    log_verbose("Testing for HZ\n");
596    {
597        static const UChar    sampleText[]    = { 0x0031, 0xd801};
598        static const uint8_t expected[] = { 0x7e, 0x7d, 0x31};
599        static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A};
600        static const int32_t offsets[]        = { 0x00, 0x00, 0x00, 1};
601
602        static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
603        static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31,  0x1A,  0x32 };
604        static const int32_t offsets2[]        = { 0x00, 0x00, 0x00, 0x01,  0x02 };
605
606        static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
607        static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50,  0x1A };
608        static const int32_t offsets3MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x02};
609
610        static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
611        static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a };
612        static const int32_t offsets4MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 };
613        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
614                expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR))
615            log_err("u-> HZ [UCNV_MBCS] \n");
616        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
617                expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR))
618            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
619
620        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
621                expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR))
622            log_err("u->HZ[UCNV_DBCS] did not match\n");
623        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
624                expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
625            log_err("u-> HZ [UCNV_DBCS] did not match\n");
626        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
627                expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
628            log_err("u-> HZ [UCNV_DBCS] did not match\n");
629
630        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
631                expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR))
632            log_err("u->HZ [UCNV_MBCS] \n");
633        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
634                expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR))
635            log_err("u-> HZ[UCNV_MBCS] \n");
636
637        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
638                expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR))
639            log_err("u-> HZ [UCNV_MBCS] \n");
640        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
641                expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR))
642            log_err("u-> HZ [UCNV_MBCS] \n");
643    }
644#endif
645}
646
647#if !UCONFIG_NO_LEGACY_CONVERSION
648/*test different convertToUnicode error behaviours*/
649static void TestToUnicodeErrorBehaviour()
650{
651    log_verbose("Testing error conditions for DBCS\n");
652    {
653        uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04};
654        const UChar expected[] = { 0x00a1 };
655
656        if(!convertToU(sampleText, sizeof(sampleText),
657                expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_AMBIGUOUS_ALIAS_WARNING ))
658            log_err("DBCS (ibm-1363)->Unicode  did not match.\n");
659        if(!convertToU(sampleText, sizeof(sampleText),
660                expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING ))
661            log_err("DBCS (ibm-1363)->Unicode  with flush = false did not match.\n");
662    }
663    log_verbose("Testing error conditions for SBCS\n");
664    {
665        uint8_t sampleText[] = { 0xa2, 0xFF};
666        const UChar expected[] = { 0x00c2 };
667
668      /*  uint8_t sampleText2[] = { 0xa2, 0x70 };
669        const UChar expected2[] = { 0x0073 };*/
670
671        if(!convertToU(sampleText, sizeof(sampleText),
672                expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR ))
673            log_err("SBCS (ibm-1051)->Unicode  did not match.\n");
674        if(!convertToU(sampleText, sizeof(sampleText),
675                expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR ))
676            log_err("SBCS (ibm-1051)->Unicode  with flush = false did not match.\n");
677
678    }
679}
680
681static void TestGetNextErrorBehaviour(){
682   /*Test for unassigned character*/
683#define INPUT_SIZE 1
684    static const char input1[INPUT_SIZE]={ 0x70 };
685    const char* source=(const char*)input1;
686    UErrorCode err=U_ZERO_ERROR;
687    UChar32 c=0;
688    UConverter *cnv=ucnv_open("ibm-424", &err);
689    if(U_FAILURE(err)) {
690        log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err));
691        return;
692    }
693    c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err);
694    if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
695        log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n",  myErrorName(err), c);
696    }
697    ucnv_close(cnv);
698}
699#endif
700
701#define MAX_UTF16_LEN 2
702#define MAX_UTF8_LEN 4
703
704/*Regression test for utf8 converter*/
705static void TestRegressionUTF8(){
706    UChar32 currCh = 0;
707    int32_t offset8;
708    int32_t offset16;
709    UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
710    uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH);
711
712    while (currCh <= UNICODE_LIMIT) {
713        offset16 = 0;
714        offset8 = 0;
715        while(currCh <= UNICODE_LIMIT
716            && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
717            && offset8 < (MAX_LENGTH - MAX_UTF8_LEN))
718        {
719            if (currCh == SURROGATE_HIGH_START) {
720                currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
721            }
722            UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
723            UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh);
724            currCh++;
725        }
726        if(!convertFromU(standardForm, offset16,
727            utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
728            log_err("Unicode->UTF8 did not match.\n");
729        }
730        if(!convertToU(utf8, offset8,
731            standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
732            log_err("UTF8->Unicode did not match.\n");
733        }
734    }
735
736    free(standardForm);
737    free(utf8);
738
739    {
740        static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 };
741        static const UChar expected[] = { 0x0301, 0x0300 };
742        UConverter *conv8;
743        UErrorCode err = U_ZERO_ERROR;
744        UChar pivotBuffer[100];
745        const UChar* const pivEnd = pivotBuffer + 100;
746        const char* srcBeg;
747        const char* srcEnd;
748        UChar* pivBeg;
749
750        conv8 = ucnv_open("UTF-8", &err);
751
752        srcBeg = src8;
753        pivBeg = pivotBuffer;
754        srcEnd = src8 + 3;
755        ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
756        if (srcBeg != srcEnd) {
757            log_err("Did not consume whole buffer on first call.\n");
758        }
759
760        srcEnd = src8 + 4;
761        ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
762        if (srcBeg != srcEnd) {
763            log_err("Did not consume whole buffer on second call.\n");
764        }
765
766        if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
767            log_err("Did not get expected results for UTF-8.\n");
768        }
769        ucnv_close(conv8);
770    }
771}
772
773#define MAX_UTF32_LEN 1
774
775static void TestRegressionUTF32(){
776    UChar32 currCh = 0;
777    int32_t offset32;
778    int32_t offset16;
779    UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
780    UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32));
781
782    while (currCh <= UNICODE_LIMIT) {
783        offset16 = 0;
784        offset32 = 0;
785        while(currCh <= UNICODE_LIMIT
786            && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
787            && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN))
788        {
789            if (currCh == SURROGATE_HIGH_START) {
790                currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
791            }
792            UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
793            UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh);
794            currCh++;
795        }
796        if(!convertFromU(standardForm, offset16,
797            (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
798            log_err("Unicode->UTF32 did not match.\n");
799        }
800        if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32),
801            standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
802            log_err("UTF32->Unicode did not match.\n");
803        }
804    }
805    free(standardForm);
806    free(utf32);
807
808    {
809        /* Check for lone surrogate error handling. */
810        static const UChar   sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 };
811        static const UChar   sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 };
812        static const uint8_t expectedUTF32BE[] = {
813            0x00, 0x00, 0x00, 0x31,
814            0x00, 0x00, 0xff, 0xfd,
815            0x00, 0x00, 0x00, 0x32
816        };
817        static const uint8_t expectedUTF32LE[] = {
818            0x31, 0x00, 0x00, 0x00,
819            0xfd, 0xff, 0x00, 0x00,
820            0x32, 0x00, 0x00, 0x00
821        };
822        static const int32_t offsetsUTF32[] = {
823            0x00, 0x00, 0x00, 0x00,
824            0x01, 0x01, 0x01, 0x01,
825            0x02, 0x02, 0x02, 0x02
826        };
827
828        if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]),
829                expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
830            log_err("u->UTF-32BE\n");
831        if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]),
832                expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
833            log_err("u->UTF-32BE\n");
834
835        if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]),
836                expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
837            log_err("u->UTF-32LE\n");
838        if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]),
839                expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
840            log_err("u->UTF-32LE\n");
841    }
842
843    {
844        static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 };
845        static const UChar expected[] = { 0x0031, 0x0030 };
846        UConverter *convBE;
847        UErrorCode err = U_ZERO_ERROR;
848        UChar pivotBuffer[100];
849        const UChar* const pivEnd = pivotBuffer + 100;
850        const char* srcBeg;
851        const char* srcEnd;
852        UChar* pivBeg;
853
854        convBE = ucnv_open("UTF-32BE", &err);
855
856        srcBeg = srcBE;
857        pivBeg = pivotBuffer;
858        srcEnd = srcBE + 5;
859        ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
860        if (srcBeg != srcEnd) {
861            log_err("Did not consume whole buffer on first call.\n");
862        }
863
864        srcEnd = srcBE + 8;
865        ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
866        if (srcBeg != srcEnd) {
867            log_err("Did not consume whole buffer on second call.\n");
868        }
869
870        if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
871            log_err("Did not get expected results for UTF-32BE.\n");
872        }
873        ucnv_close(convBE);
874    }
875    {
876        static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 };
877        static const UChar expected[] = { 0x0031, 0x0030 };
878        UConverter *convLE;
879        UErrorCode err = U_ZERO_ERROR;
880        UChar pivotBuffer[100];
881        const UChar* const pivEnd = pivotBuffer + 100;
882        const char* srcBeg;
883        const char* srcEnd;
884        UChar* pivBeg;
885
886        convLE = ucnv_open("UTF-32LE", &err);
887
888        srcBeg = srcLE;
889        pivBeg = pivotBuffer;
890        srcEnd = srcLE + 5;
891        ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
892        if (srcBeg != srcEnd) {
893            log_err("Did not consume whole buffer on first call.\n");
894        }
895
896        srcEnd = srcLE + 8;
897        ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
898        if (srcBeg != srcEnd) {
899            log_err("Did not consume whole buffer on second call.\n");
900        }
901
902        if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
903            log_err("Did not get expected results for UTF-32LE.\n");
904        }
905        ucnv_close(convLE);
906    }
907}
908
909/*Walk through the available converters*/
910static void TestAvailableConverters(){
911    UErrorCode status=U_ZERO_ERROR;
912    UConverter *conv=NULL;
913    int32_t i=0;
914    for(i=0; i < ucnv_countAvailable(); i++){
915        status=U_ZERO_ERROR;
916        conv=ucnv_open(ucnv_getAvailableName(i), &status);
917        if(U_FAILURE(status)){
918            log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n",
919                        ucnv_getAvailableName(i), myErrorName(status));
920            continue;
921        }
922        ucnv_close(conv);
923    }
924
925}
926
927static void TestFlushInternalBuffer(){
928    TestWithBufferSize(MAX_LENGTH, 1);
929    TestWithBufferSize(1, 1);
930    TestWithBufferSize(1, MAX_LENGTH);
931    TestWithBufferSize(MAX_LENGTH, MAX_LENGTH);
932}
933
934static void TestWithBufferSize(int32_t insize, int32_t outsize){
935
936    gInBufferSize =insize;
937    gOutBufferSize = outsize;
938
939     log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
940    {
941        UChar    sampleText[] =
942            { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09,  0x002E  };
943        const uint8_t expectedUTF8[] =
944            { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
945        int32_t  toUTF8Offs[] =
946            { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
947       /* int32_t fmUTF8Offs[] =
948            { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/
949
950        /*UTF-8*/
951        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
952            expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE))
953             log_err("u-> UTF8 did not match.\n");
954    }
955
956#if !UCONFIG_NO_LEGACY_CONVERSION
957     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
958    {
959        UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
960        const uint8_t toIBM943[]= { 0x61,
961            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
962            0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
963            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
964            0x61 };
965        int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
966
967        if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
968                toIBM943, sizeof(toIBM943), "ibm-943",
969                (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE))
970            log_err("u-> ibm-943 with subst with value did not match.\n");
971    }
972#endif
973
974     log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
975    {
976        const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
977            0xe0, 0x80,  0x61};
978        UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0x0061};
979        int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0006};
980
981        if(!testConvertToU(sampleText1, sizeof(sampleText1),
982                 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE))
983            log_err("utf8->u with substitute did not match.\n");;
984    }
985
986#if !UCONFIG_NO_LEGACY_CONVERSION
987    log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
988    /*to Unicode*/
989    {
990        const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
991            0x81, 0xad, /*unassigned*/
992            0x89, 0xd3 };
993        UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
994            0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
995            0x7B87};
996        int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
997
998        if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU),
999                 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
1000                (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE))
1001            log_err("ibm-943->u with substitute with value did not match.\n");
1002
1003    }
1004#endif
1005}
1006
1007static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
1008                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
1009{
1010
1011    int32_t i=0;
1012    char *p=0;
1013    const UChar *src;
1014    char buffer[MAX_LENGTH];
1015    int32_t offsetBuffer[MAX_LENGTH];
1016    int32_t *offs=0;
1017    char *targ;
1018    char *targetLimit;
1019    UChar *sourceLimit=0;
1020    UErrorCode status = U_ZERO_ERROR;
1021    UConverter *conv = 0;
1022    conv = ucnv_open(codepage, &status);
1023    if(U_FAILURE(status))
1024    {
1025        log_data_err("Couldn't open converter %s\n",codepage);
1026        return TRUE;
1027    }
1028    log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
1029
1030    for(i=0; i<MAX_LENGTH; i++){
1031        buffer[i]=(char)0xF0;
1032        offsetBuffer[i]=0xFF;
1033    }
1034
1035    src=source;
1036    sourceLimit=(UChar*)src+(sourceLen);
1037    targ=buffer;
1038    targetLimit=targ+MAX_LENGTH;
1039    offs=offsetBuffer;
1040    ucnv_fromUnicode (conv,
1041                  (char **)&targ,
1042                  (const char *)targetLimit,
1043                  &src,
1044                  sourceLimit,
1045                  expectOffsets ? offs : NULL,
1046                  doFlush,
1047                  &status);
1048    ucnv_close(conv);
1049    if(status != expectedStatus){
1050          log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
1051          return FALSE;
1052    }
1053
1054    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1055        sourceLen, targ-buffer);
1056
1057    if(expectLen != targ-buffer)
1058    {
1059        log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
1060        log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
1061        printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer));
1062        printSeqErr((const unsigned char*)expect, expectLen);
1063        return FALSE;
1064    }
1065
1066    if(memcmp(buffer, expect, expectLen)){
1067        log_err("String does not match. FROM Unicode to codePage%s\n", codepage);
1068        log_info("\nGot:");
1069        printSeqErr((const unsigned char *)buffer, expectLen);
1070        log_info("\nExpected:");
1071        printSeqErr((const unsigned char *)expect, expectLen);
1072        return FALSE;
1073    }
1074    else {
1075        log_verbose("Matches!\n");
1076    }
1077
1078    if (expectOffsets != 0){
1079        log_verbose("comparing %d offsets..\n", targ-buffer);
1080        if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){
1081            log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage);
1082            log_info("\nGot  : ");
1083            printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer));
1084            for(p=buffer;p<targ;p++)
1085                log_info("%d, ", offsetBuffer[p-buffer]);
1086            log_info("\nExpected: ");
1087            for(i=0; i< (targ-buffer); i++)
1088                log_info("%d,", expectOffsets[i]);
1089        }
1090    }
1091
1092    return TRUE;
1093}
1094
1095
1096static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
1097               const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
1098{
1099    UErrorCode status = U_ZERO_ERROR;
1100    UConverter *conv = 0;
1101    int32_t i=0;
1102    UChar *p=0;
1103    const char* src;
1104    UChar buffer[MAX_LENGTH];
1105    int32_t offsetBuffer[MAX_LENGTH];
1106    int32_t *offs=0;
1107    UChar *targ;
1108    UChar *targetLimit;
1109    uint8_t *sourceLimit=0;
1110
1111
1112
1113    conv = ucnv_open(codepage, &status);
1114    if(U_FAILURE(status))
1115    {
1116        log_data_err("Couldn't open converter %s\n",codepage);
1117        return TRUE;
1118    }
1119    log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
1120
1121
1122
1123    for(i=0; i<MAX_LENGTH; i++){
1124        buffer[i]=0xFFFE;
1125        offsetBuffer[i]=-1;
1126    }
1127
1128    src=(const char *)source;
1129    sourceLimit=(uint8_t*)(src+(sourceLen));
1130    targ=buffer;
1131    targetLimit=targ+MAX_LENGTH;
1132    offs=offsetBuffer;
1133
1134
1135
1136    ucnv_toUnicode (conv,
1137                &targ,
1138                targetLimit,
1139                (const char **)&src,
1140                (const char *)sourceLimit,
1141                expectOffsets ? offs : NULL,
1142                doFlush,
1143                &status);
1144
1145    ucnv_close(conv);
1146    if(status != expectedStatus){
1147          log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
1148          return FALSE;
1149    }
1150    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1151        sourceLen, targ-buffer);
1152
1153
1154
1155
1156    log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2);
1157
1158    if (expectOffsets != 0) {
1159        if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){
1160
1161            log_err("did not get the expected offsets from %s To UNICODE\n", codepage);
1162            log_info("\nGot : ");
1163            for(p=buffer;p<targ;p++)
1164                log_info("%d, ", offsetBuffer[p-buffer]);
1165            log_info("\nExpected: ");
1166            for(i=0; i<(targ-buffer); i++)
1167                log_info("%d, ", expectOffsets[i]);
1168            log_info("\nGot result:");
1169            for(i=0; i<(targ-buffer); i++)
1170                log_info("0x%04X,", buffer[i]);
1171            log_info("\nFrom Input:");
1172            for(i=0; i<(src-(const char *)source); i++)
1173                log_info("0x%02X,", (unsigned char)source[i]);
1174            log_info("\n");
1175        }
1176    }
1177    if(memcmp(buffer, expect, expectLen*2)){
1178        log_err("String does not match. from codePage %s TO Unicode\n", codepage);
1179        log_info("\nGot:");
1180        printUSeqErr(buffer, expectLen);
1181        log_info("\nExpected:");
1182        printUSeqErr(expect, expectLen);
1183        return FALSE;
1184    }
1185    else {
1186        log_verbose("Matches!\n");
1187    }
1188
1189    return TRUE;
1190}
1191
1192
1193static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
1194                const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset)
1195{
1196    UErrorCode status = U_ZERO_ERROR;
1197    UConverter *conv = 0;
1198    char    junkout[MAX_LENGTH]; /* FIX */
1199    int32_t    junokout[MAX_LENGTH]; /* FIX */
1200    char *p;
1201    const UChar *src;
1202    char *end;
1203    char *targ;
1204    int32_t *offs;
1205    int i;
1206    int32_t   realBufferSize;
1207    char *realBufferEnd;
1208    const UChar *realSourceEnd;
1209    const UChar *sourceLimit;
1210    UBool checkOffsets = TRUE;
1211    UBool doFlush;
1212
1213    UConverterFromUCallback oldAction = NULL;
1214    const void* oldContext = NULL;
1215
1216    for(i=0;i<MAX_LENGTH;i++)
1217        junkout[i] = (char)0xF0;
1218    for(i=0;i<MAX_LENGTH;i++)
1219        junokout[i] = 0xFF;
1220
1221    setNuConvTestName(codepage, "FROM");
1222
1223    log_verbose("\n=========  %s\n", gNuConvTestName);
1224
1225    conv = ucnv_open(codepage, &status);
1226    if(U_FAILURE(status))
1227    {
1228        log_data_err("Couldn't open converter %s\n",codepage);
1229        return TRUE;
1230    }
1231
1232    log_verbose("Converter opened..\n");
1233    /*----setting the callback routine----*/
1234    ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1235    if (U_FAILURE(status)) {
1236        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1237    }
1238    /*------------------------*/
1239
1240    src = source;
1241    targ = junkout;
1242    offs = junokout;
1243
1244    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
1245    realBufferEnd = junkout + realBufferSize;
1246    realSourceEnd = source + sourceLen;
1247
1248    if ( gOutBufferSize != realBufferSize )
1249      checkOffsets = FALSE;
1250
1251    if( gInBufferSize != MAX_LENGTH )
1252      checkOffsets = FALSE;
1253
1254    do
1255    {
1256        end = nct_min(targ + gOutBufferSize, realBufferEnd);
1257        sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
1258
1259        doFlush = (UBool)(sourceLimit == realSourceEnd);
1260
1261        if(targ == realBufferEnd)
1262          {
1263        log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
1264        return FALSE;
1265          }
1266        log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
1267
1268
1269        status = U_ZERO_ERROR;
1270        if(gInBufferSize ==999 && gOutBufferSize==999)
1271            doFlush = FALSE;
1272        ucnv_fromUnicode (conv,
1273                  (char **)&targ,
1274                  (const char *)end,
1275                  &src,
1276                  sourceLimit,
1277                  offs,
1278                  doFlush, /* flush if we're at the end of the input data */
1279                  &status);
1280        if(testReset)
1281            ucnv_resetToUnicode(conv);
1282        if(gInBufferSize ==999 && gOutBufferSize==999)
1283            ucnv_resetToUnicode(conv);
1284
1285      } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
1286
1287    if(U_FAILURE(status)) {
1288        log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1289        return FALSE;
1290      }
1291
1292    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1293        sourceLen, targ-junkout);
1294    if(getTestOption(VERBOSITY_OPTION))
1295    {
1296        char junk[999];
1297        char offset_str[999];
1298        char *ptr;
1299
1300        junk[0] = 0;
1301        offset_str[0] = 0;
1302        for(ptr = junkout;ptr<targ;ptr++)
1303        {
1304            sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr);
1305            sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]);
1306        }
1307
1308        log_verbose(junk);
1309        printSeq((const unsigned char *)expect, expectLen);
1310        if ( checkOffsets )
1311          {
1312            log_verbose("\nOffsets:");
1313            log_verbose(offset_str);
1314          }
1315        log_verbose("\n");
1316    }
1317    ucnv_close(conv);
1318
1319
1320    if(expectLen != targ-junkout)
1321    {
1322        log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1323        log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1324        log_info("\nGot:");
1325        printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
1326        log_info("\nExpected:");
1327        printSeqErr((const unsigned char*)expect, expectLen);
1328        return FALSE;
1329    }
1330
1331    if (checkOffsets && (expectOffsets != 0) )
1332    {
1333        log_verbose("comparing %d offsets..\n", targ-junkout);
1334        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
1335            log_err("did not get the expected offsets. %s", gNuConvTestName);
1336            log_err("Got  : ");
1337            printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
1338            for(p=junkout;p<targ;p++)
1339                log_err("%d, ", junokout[p-junkout]);
1340            log_err("\nExpected: ");
1341            for(i=0; i<(targ-junkout); i++)
1342                log_err("%d,", expectOffsets[i]);
1343        }
1344    }
1345
1346    log_verbose("comparing..\n");
1347    if(!memcmp(junkout, expect, expectLen))
1348    {
1349        log_verbose("Matches!\n");
1350        return TRUE;
1351    }
1352    else
1353    {
1354        log_err("String does not match. %s\n", gNuConvTestName);
1355        printUSeqErr(source, sourceLen);
1356        log_info("\nGot:");
1357        printSeqErr((const unsigned char *)junkout, expectLen);
1358        log_info("\nExpected:");
1359        printSeqErr((const unsigned char *)expect, expectLen);
1360
1361        return FALSE;
1362    }
1363}
1364
1365static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
1366               const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset)
1367{
1368    UErrorCode status = U_ZERO_ERROR;
1369    UConverter *conv = 0;
1370    UChar    junkout[MAX_LENGTH]; /* FIX */
1371    int32_t    junokout[MAX_LENGTH]; /* FIX */
1372    const char *src;
1373    const char *realSourceEnd;
1374    const char *srcLimit;
1375    UChar *p;
1376    UChar *targ;
1377    UChar *end;
1378    int32_t *offs;
1379    int i;
1380    UBool   checkOffsets = TRUE;
1381    int32_t   realBufferSize;
1382    UChar *realBufferEnd;
1383    UBool doFlush;
1384
1385    UConverterToUCallback oldAction = NULL;
1386    const void* oldContext = NULL;
1387
1388
1389    for(i=0;i<MAX_LENGTH;i++)
1390        junkout[i] = 0xFFFE;
1391
1392    for(i=0;i<MAX_LENGTH;i++)
1393        junokout[i] = -1;
1394
1395    setNuConvTestName(codepage, "TO");
1396
1397    log_verbose("\n=========  %s\n", gNuConvTestName);
1398
1399    conv = ucnv_open(codepage, &status);
1400    if(U_FAILURE(status))
1401    {
1402        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
1403        return TRUE;
1404    }
1405
1406    log_verbose("Converter opened..\n");
1407     /*----setting the callback routine----*/
1408    ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1409    if (U_FAILURE(status)) {
1410        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1411    }
1412    /*-------------------------------------*/
1413
1414    src = (const char *)source;
1415    targ = junkout;
1416    offs = junokout;
1417
1418    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
1419    realBufferEnd = junkout + realBufferSize;
1420    realSourceEnd = src + sourcelen;
1421
1422    if ( gOutBufferSize != realBufferSize )
1423      checkOffsets = FALSE;
1424
1425    if( gInBufferSize != MAX_LENGTH )
1426      checkOffsets = FALSE;
1427
1428    do
1429      {
1430        end = nct_min( targ + gOutBufferSize, realBufferEnd);
1431        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
1432
1433        if(targ == realBufferEnd)
1434        {
1435            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
1436            return FALSE;
1437        }
1438        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
1439
1440        /* oldTarg = targ; */
1441
1442        status = U_ZERO_ERROR;
1443        doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE);
1444
1445        ucnv_toUnicode (conv,
1446                &targ,
1447                end,
1448                (const char **)&src,
1449                (const char *)srcLimit,
1450                offs,
1451                doFlush, /* flush if we're at the end of hte source data */
1452                &status);
1453        if(testReset)
1454            ucnv_resetFromUnicode(conv);
1455        if(gInBufferSize ==999 && gOutBufferSize==999)
1456            ucnv_resetToUnicode(conv);
1457        /*        offs += (targ-oldTarg); */
1458
1459      } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
1460
1461    if(U_FAILURE(status))
1462    {
1463        log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1464        return FALSE;
1465    }
1466
1467    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
1468        sourcelen, targ-junkout);
1469    if(getTestOption(VERBOSITY_OPTION))
1470    {
1471        char junk[999];
1472        char offset_str[999];
1473
1474        UChar *ptr;
1475
1476        junk[0] = 0;
1477        offset_str[0] = 0;
1478
1479        for(ptr = junkout;ptr<targ;ptr++)
1480        {
1481            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
1482            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
1483        }
1484
1485        log_verbose(junk);
1486
1487        if ( checkOffsets )
1488          {
1489            log_verbose("\nOffsets:");
1490            log_verbose(offset_str);
1491          }
1492        log_verbose("\n");
1493    }
1494    ucnv_close(conv);
1495
1496    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
1497
1498    if (checkOffsets && (expectOffsets != 0))
1499    {
1500        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
1501
1502            log_err("did not get the expected offsets. %s",gNuConvTestName);
1503            for(p=junkout;p<targ;p++)
1504                log_err("%d, ", junokout[p-junkout]);
1505            log_err("\nExpected: ");
1506            for(i=0; i<(targ-junkout); i++)
1507                log_err("%d,", expectOffsets[i]);
1508            log_err("");
1509            for(i=0; i<(targ-junkout); i++)
1510                log_err("%X,", junkout[i]);
1511            log_err("");
1512            for(i=0; i<(src-(const char *)source); i++)
1513                log_err("%X,", (unsigned char)source[i]);
1514        }
1515    }
1516
1517    if(!memcmp(junkout, expect, expectlen*2))
1518    {
1519        log_verbose("Matches!\n");
1520        return TRUE;
1521    }
1522    else
1523    {
1524        log_err("String does not match. %s\n", gNuConvTestName);
1525        log_verbose("String does not match. %s\n", gNuConvTestName);
1526        log_info("\nGot:");
1527        printUSeq(junkout, expectlen);
1528        log_info("\nExpected:");
1529        printUSeq(expect, expectlen);
1530        return FALSE;
1531    }
1532}
1533
1534
1535static void TestResetBehaviour(void){
1536#if !UCONFIG_NO_LEGACY_CONVERSION
1537    log_verbose("Testing Reset for DBCS and MBCS\n");
1538    {
1539        static const UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
1540        static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
1541        static const int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
1542
1543
1544        static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8};
1545        static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7};
1546        static const int32_t offsets1[] =  { 0,2,4,6};
1547
1548        /*DBCS*/
1549        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1550                expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1551            log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1552        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1553                expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1554            log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1555
1556        if(!testConvertToU(expected1, sizeof(expected1),
1557                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1558                offsets1, TRUE))
1559           log_err("ibm-1363 -> did not match.\n");
1560        /*MBCS*/
1561        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1562                expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1563            log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1564        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1565                expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1566            log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1567
1568        if(!testConvertToU(expected1, sizeof(expected1),
1569                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1570                offsets1, TRUE))
1571           log_err("ibm-1363 -> did not match.\n");
1572
1573    }
1574
1575    log_verbose("Testing Reset for ISO-2022-jp\n");
1576    {
1577        static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1578
1579        static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1580                                    0x31,0x1A, 0x32};
1581
1582
1583        static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
1584
1585
1586        static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1587        static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1588                                    0x31,0x1A, 0x32};
1589        static const int32_t offsets1[] =  { 3,5,10,11,12};
1590
1591        // iso-2022-jp  android-change
1592        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1593                expected, sizeof(expected), "iso-2022-jp",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1594            log_err("u-> not match.\n");
1595        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1596                expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1597            log_err("u->  not match.\n");
1598
1599        if(!testConvertToU(expected1, sizeof(expected1),
1600                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1601                offsets1, TRUE))
1602           log_err("iso-2022-jp -> did not match.\n");
1603
1604    }
1605
1606    /* BEGIN android-removed */
1607    /* To save space, Android does not build full ISO-2022-CN tables.
1608       We skip the tests for ISO-2022-CN. */
1609    /*
1610    log_verbose("Testing Reset for ISO-2022-cn\n");
1611    {
1612        static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1613
1614        static const uint8_t expected[] = {
1615                                    0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1616                                    0x36, 0x21,
1617                                    0x0f, 0x31,
1618                                    0x1A,
1619                                    0x32
1620                                    };
1621
1622
1623        static const int32_t offsets[] = {
1624                                    0,    0,    0,    0,    0,    0,    0,
1625                                    1,    1,
1626                                    2,    2,
1627                                    3,
1628                                    5,  };
1629
1630        UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1631        static const uint8_t expected1[] = {
1632                                    0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1633                                    0x36, 0x21,
1634                                    0x1B, 0x24, 0x29, 0x47, 0x24, 0x22,
1635                                    0x0f, 0x1A,
1636                                    0x32
1637                                    };
1638        static const int32_t offsets1[] =  { 5,7,13,16,17};
1639
1640        // iso-2022-CN  android-change
1641        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1642                expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1643            log_err("u-> not match.\n");
1644        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1645                expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1646            log_err("u-> not match.\n");
1647
1648        if(!testConvertToU(expected1, sizeof(expected1),
1649                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1650                offsets1, TRUE))
1651           log_err("iso-2022-cn -> did not match.\n");
1652    }
1653    */
1654    /* END android-removed */
1655
1656        log_verbose("Testing Reset for ISO-2022-kr\n");
1657    {
1658        UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1659
1660        static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
1661                                    0x0E, 0x6C, 0x69,
1662                                    0x0f, 0x1A,
1663                                    0x0e, 0x6F, 0x4B,
1664                                    0x0F, 0x31,
1665                                    0x1A,
1666                                    0x32 };
1667
1668        static const int32_t offsets[] = {-1, -1, -1, -1,
1669                              0, 0, 0,
1670                              1, 1,
1671                              3, 3, 3,
1672                              4, 4,
1673                              5,
1674                              7,
1675                            };
1676        static const UChar    sampleText1[] =   { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032};
1677
1678        static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43,
1679                                    0x0E, 0x6C, 0x69,
1680                                    0x0f, 0x41,
1681                                    0x0e, 0x6F, 0x4B,
1682                                    0x0F, 0x31,
1683                                    0x42,
1684                                    0x32 };
1685
1686        static const int32_t offsets1[] = {
1687                              5, 8, 10,
1688                              13, 14, 15
1689
1690                            };
1691        // iso-2022-kr  android-change
1692        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1693                expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1694            log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1695        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1696                expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1697            log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1698        if(!testConvertToU(expected1, sizeof(expected1),
1699                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1700                offsets1, TRUE))
1701           log_err("iso-2022-kr -> did not match.\n");
1702    }
1703
1704        log_verbose("Testing Reset for HZ\n");
1705    {
1706        static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1707
1708        static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
1709                                    0x7E, 0x7D, 0x1A,
1710                                    0x7E, 0x7B, 0x36, 0x21,
1711                                    0x7E, 0x7D, 0x31,
1712                                    0x1A,
1713                                    0x32 };
1714
1715
1716        static const int32_t offsets[] = {0,0,0,0,
1717                             1,1,1,
1718                             3,3,3,3,
1719                             4,4,4,
1720                             5,
1721                             7,};
1722        static const UChar    sampleText1[] =   { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032};
1723
1724        static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B,
1725                                    0x7E, 0x7D, 0x35,
1726                                    0x7E, 0x7B, 0x36, 0x21,
1727                                    0x7E, 0x7D, 0x31,
1728                                    0x41,
1729                                    0x32 };
1730
1731
1732        static const int32_t offsets1[] = {2,6,9,13,14,15
1733                            };
1734
1735        /*hz*/
1736        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1737                expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1738            log_err("u->  not match.\n");
1739        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1740                expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1741            log_err("u->  not match.\n");
1742        if(!testConvertToU(expected1, sizeof(expected1),
1743                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1744                offsets1, TRUE))
1745           log_err("hz -> did not match.\n");
1746    }
1747#endif
1748
1749    /*UTF-8*/
1750     log_verbose("Testing for UTF8\n");
1751    {
1752        static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
1753        int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
1754                           0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
1755                           0x04, 0x06 };
1756        static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
1757            0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
1758
1759
1760        static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
1761        /*UTF-8*/
1762        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1763            expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1764            log_err("u-> UTF8 with offsets and flush true did not match.\n");
1765        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1766            expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1767            log_err("u-> UTF8 with offsets and flush true did not match.\n");
1768        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1769            expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1770            log_err("u-> UTF8 with offsets and flush true did not match.\n");
1771        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1772            expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1773            log_err("u-> UTF8 with offsets and flush true did not match.\n");
1774        if(!testConvertToU(expected, sizeof(expected),
1775            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1776            log_err("UTF8 -> did not match.\n");
1777        if(!testConvertToU(expected, sizeof(expected),
1778            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1779            log_err("UTF8 -> did not match.\n");
1780        if(!testConvertToU(expected, sizeof(expected),
1781            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
1782            log_err("UTF8 -> did not match.\n");
1783        if(!testConvertToU(expected, sizeof(expected),
1784            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
1785            log_err("UTF8 -> did not match.\n");
1786
1787    }
1788
1789}
1790
1791/* Test that U_TRUNCATED_CHAR_FOUND is set. */
1792static void
1793doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {
1794    UConverter *cnv;
1795
1796    UChar buffer[2];
1797    UChar *target, *targetLimit;
1798    const char *source, *sourceLimit;
1799
1800    UErrorCode errorCode;
1801
1802    errorCode=U_ZERO_ERROR;
1803    cnv=ucnv_open(cnvName, &errorCode);
1804    if(U_FAILURE(errorCode)) {
1805        log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode));
1806        return;
1807    }
1808    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
1809    if(U_FAILURE(errorCode)) {
1810        log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n",
1811                    cnvName, u_errorName(errorCode));
1812        ucnv_close(cnv);
1813        return;
1814    }
1815
1816    source=(const char *)bytes;
1817    sourceLimit=source+length;
1818    target=buffer;
1819    targetLimit=buffer+LENGTHOF(buffer);
1820
1821    /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */
1822    ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode);
1823    if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) {
1824        log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n",
1825                cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1826    }
1827
1828    errorCode=U_ZERO_ERROR;
1829    source=sourceLimit;
1830    target=buffer;
1831    ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
1832    if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) {
1833        log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n",
1834                cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer));
1835    }
1836
1837    /* 2. input bytes with flush=TRUE */
1838    ucnv_resetToUnicode(cnv);
1839
1840    errorCode=U_ZERO_ERROR;
1841    source=(const char *)bytes;
1842    target=buffer;
1843    ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
1844    if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) {
1845        log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n",
1846                cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1847    }
1848
1849
1850    ucnv_close(cnv);
1851}
1852
1853static void
1854TestTruncated() {
1855    static const struct {
1856        const char *cnvName;
1857        uint8_t bytes[8]; /* partial input bytes resulting in no output */
1858        int32_t length;
1859    } testCases[]={
1860        { "IMAP-mailbox-name",  { 0x26 }, 1 }, /* & */
1861        { "IMAP-mailbox-name",  { 0x26, 0x42 }, 2 }, /* &B */
1862        { "IMAP-mailbox-name",  { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
1863        { "IMAP-mailbox-name",  { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
1864
1865        { "UTF-7",      { 0x2b, 0x42 }, 2 }, /* +B */
1866        { "UTF-8",      { 0xd1 }, 1 },
1867
1868        { "UTF-16BE",   { 0x4e }, 1 },
1869        { "UTF-16LE",   { 0x4e }, 1 },
1870        { "UTF-16",     { 0x4e }, 1 },
1871        { "UTF-16",     { 0xff }, 1 },
1872        { "UTF-16",     { 0xfe, 0xff, 0x4e }, 3 },
1873
1874        { "UTF-32BE",   { 0, 0, 0x4e }, 3 },
1875        { "UTF-32LE",   { 0x4e }, 1 },
1876        { "UTF-32",     { 0, 0, 0x4e }, 3 },
1877        { "UTF-32",     { 0xff }, 1 },
1878        { "UTF-32",     { 0, 0, 0xfe, 0xff, 0 }, 5 },
1879        { "SCSU",       { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */
1880
1881#if !UCONFIG_NO_LEGACY_CONVERSION
1882        { "BOCU-1",     { 0xd5 }, 1 },
1883
1884        { "Shift-JIS",  { 0xe0 }, 1 },
1885
1886        { "ibm-939",    { 0x0e, 0x41 }, 2 } /* SO 0x41 */
1887#else
1888        { "BOCU-1",     { 0xd5 }, 1 ,}
1889#endif
1890    };
1891    int32_t i;
1892
1893    for(i=0; i<LENGTHOF(testCases); ++i) {
1894        doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
1895    }
1896}
1897
1898typedef struct NameRange {
1899    const char *name;
1900    UChar32 start, end, start2, end2, notStart, notEnd;
1901} NameRange;
1902
1903static void
1904TestUnicodeSet() {
1905    UErrorCode errorCode;
1906    UConverter *cnv;
1907    USet *set;
1908    const char *name;
1909    int32_t i, count;
1910
1911    static const char *const completeSetNames[]={
1912        "UTF-7",
1913        "UTF-8",
1914        "UTF-16",
1915        "UTF-16BE",
1916        "UTF-16LE",
1917        "UTF-32",
1918        "UTF-32BE",
1919        "UTF-32LE",
1920        "SCSU",
1921        "BOCU-1",
1922        "CESU-8",
1923#if !UCONFIG_NO_LEGACY_CONVERSION
1924        "gb18030",
1925#endif
1926        "IMAP-mailbox-name"
1927    };
1928#if !UCONFIG_NO_LEGACY_CONVERSION
1929    static const char *const lmbcsNames[]={
1930        "LMBCS-1",
1931        "LMBCS-2",
1932        "LMBCS-3",
1933        "LMBCS-4",
1934        "LMBCS-5",
1935        "LMBCS-6",
1936        "LMBCS-8",
1937        "LMBCS-11",
1938        "LMBCS-16",
1939        "LMBCS-17",
1940        "LMBCS-18",
1941        "LMBCS-19"
1942    };
1943#endif
1944
1945    static const NameRange nameRanges[]={
1946        { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1947#if !UCONFIG_NO_LEGACY_CONVERSION
1948        { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1949#endif
1950        { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
1951#if !UCONFIG_NO_LEGACY_CONVERSION
1952        { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
1953        { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
1954        /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */
1955        { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
1956#else
1957        { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }
1958#endif
1959    };
1960
1961    /* open an empty set */
1962    set=uset_open(1, 0);
1963
1964    count=ucnv_countAvailable();
1965    for(i=0; i<count; ++i) {
1966        errorCode=U_ZERO_ERROR;
1967        name=ucnv_getAvailableName(i);
1968        cnv=ucnv_open(name, &errorCode);
1969        if(U_FAILURE(errorCode)) {
1970            log_data_err("error: unable to open converter %s - %s\n",
1971                    name, u_errorName(errorCode));
1972            continue;
1973        }
1974
1975        uset_clear(set);
1976        ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
1977        if(U_FAILURE(errorCode)) {
1978            log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
1979                    name, u_errorName(errorCode));
1980        } else if(uset_size(set)==0) {
1981            log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
1982        }
1983
1984        ucnv_close(cnv);
1985    }
1986
1987    /* test converters that are known to convert all of Unicode (except maybe for surrogates) */
1988    for(i=0; i<LENGTHOF(completeSetNames); ++i) {
1989        errorCode=U_ZERO_ERROR;
1990        name=completeSetNames[i];
1991        cnv=ucnv_open(name, &errorCode);
1992        if(U_FAILURE(errorCode)) {
1993            log_data_err("error: unable to open converter %s - %s\n",
1994                    name, u_errorName(errorCode));
1995            continue;
1996        }
1997
1998        uset_clear(set);
1999        ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2000        if(U_FAILURE(errorCode)) {
2001            log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2002                    name, u_errorName(errorCode));
2003        } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
2004            log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
2005        }
2006
2007        ucnv_close(cnv);
2008    }
2009
2010#if !UCONFIG_NO_LEGACY_CONVERSION
2011    /* test LMBCS variants which convert all of Unicode except for U+F6xx */
2012    for(i=0; i<LENGTHOF(lmbcsNames); ++i) {
2013        errorCode=U_ZERO_ERROR;
2014        name=lmbcsNames[i];
2015        cnv=ucnv_open(name, &errorCode);
2016        if(U_FAILURE(errorCode)) {
2017            log_data_err("error: unable to open converter %s - %s\n",
2018                    name, u_errorName(errorCode));
2019            continue;
2020        }
2021
2022        uset_clear(set);
2023        ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2024        if(U_FAILURE(errorCode)) {
2025            log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2026                    name, u_errorName(errorCode));
2027        } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) {
2028            log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name);
2029        }
2030
2031        ucnv_close(cnv);
2032    }
2033#endif
2034
2035    /* test specific sets */
2036    for(i=0; i<LENGTHOF(nameRanges); ++i) {
2037        errorCode=U_ZERO_ERROR;
2038        name=nameRanges[i].name;
2039        cnv=ucnv_open(name, &errorCode);
2040        if(U_FAILURE(errorCode)) {
2041            log_data_err("error: unable to open converter %s - %s\n",
2042                         name, u_errorName(errorCode));
2043            continue;
2044        }
2045
2046        uset_clear(set);
2047        ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2048        if(U_FAILURE(errorCode)) {
2049            log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2050                    name, u_errorName(errorCode));
2051        } else if(
2052            !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
2053            (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2))
2054        ) {
2055            log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
2056        } else if(nameRanges[i].notStart>=0) {
2057            /* simulate containsAny() with the C API */
2058            uset_complement(set);
2059            if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
2060                log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
2061            }
2062        }
2063
2064        ucnv_close(cnv);
2065    }
2066
2067    errorCode = U_ZERO_ERROR;
2068    ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode);
2069    if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2070        log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
2071    }
2072    errorCode = U_PARSE_ERROR;
2073    /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */
2074    ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode);
2075    if (errorCode != U_PARSE_ERROR) {
2076        log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
2077    }
2078
2079    uset_close(set);
2080}
2081