1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*****************************************************************************
7*
8* File CCONVTST.C
9*
10* Modification History:
11*        Name                     Description
12*   Madhu Katragadda              7/7/2000        Converter Tests for extended code coverage
13******************************************************************************
14*/
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include "unicode/uloc.h"
19#include "unicode/ucnv.h"
20#include "unicode/utypes.h"
21#include "unicode/ustring.h"
22#include "unicode/uset.h"
23#include "cintltst.h"
24
25#define MAX_LENGTH 999
26
27#define UNICODE_LIMIT 0x10FFFF
28#define SURROGATE_HIGH_START    0xD800
29#define SURROGATE_LOW_END       0xDFFF
30
31static int32_t  gInBufferSize = 0;
32static int32_t  gOutBufferSize = 0;
33static char     gNuConvTestName[1024];
34
35#define nct_min(x,y)  ((x<y) ? x : y)
36#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
37
38static void printSeq(const unsigned char* a, int len);
39static void printSeqErr(const unsigned char* a, int len);
40static void printUSeq(const UChar* a, int len);
41static void printUSeqErr(const UChar* a, int len);
42static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
43                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
44static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
45               const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
46
47static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
48                const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset);
49static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
50               const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset);
51
52static void setNuConvTestName(const char *codepage, const char *direction)
53{
54    sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
55        codepage,
56        direction,
57        (int)gInBufferSize,
58        (int)gOutBufferSize);
59}
60
61
62static void TestSurrogateBehaviour(void);
63static void TestErrorBehaviour(void);
64
65#if !UCONFIG_NO_LEGACY_CONVERSION
66static void TestToUnicodeErrorBehaviour(void);
67static void TestGetNextErrorBehaviour(void);
68#endif
69
70static void TestRegressionUTF8(void);
71static void TestRegressionUTF32(void);
72static void TestAvailableConverters(void);
73static void TestFlushInternalBuffer(void);  /*for improved code coverage in ucnv_cnv.c*/
74static void TestResetBehaviour(void);
75static void TestTruncated(void);
76static void TestUnicodeSet(void);
77
78static void TestWithBufferSize(int32_t osize, int32_t isize);
79
80
81static void printSeq(const unsigned char* a, int len)
82{
83    int i=0;
84    log_verbose("\n{");
85    while (i<len)
86        log_verbose("0x%02X ", a[i++]);
87    log_verbose("}\n");
88}
89
90static void printUSeq(const UChar* a, int len)
91{
92    int i=0;
93    log_verbose("\n{");
94    while (i<len)
95        log_verbose("%0x04X ", a[i++]);
96    log_verbose("}\n");
97}
98
99static void printSeqErr(const unsigned char* a, int len)
100{
101    int i=0;
102    fprintf(stderr, "\n{");
103    while (i<len)  fprintf(stderr, "0x%02X ", a[i++]);
104    fprintf(stderr, "}\n");
105}
106
107static void printUSeqErr(const UChar* a, int len)
108{
109    int i=0;
110    fprintf(stderr, "\n{");
111    while (i<len)
112        fprintf(stderr, "0x%04X ", a[i++]);
113    fprintf(stderr,"}\n");
114}
115
116void addExtraTests(TestNode** root);
117
118void addExtraTests(TestNode** root)
119{
120     addTest(root, &TestSurrogateBehaviour,         "tsconv/ncnvtst/TestSurrogateBehaviour");
121     addTest(root, &TestErrorBehaviour,             "tsconv/ncnvtst/TestErrorBehaviour");
122
123#if !UCONFIG_NO_LEGACY_CONVERSION
124     addTest(root, &TestToUnicodeErrorBehaviour,    "tsconv/ncnvtst/ToUnicodeErrorBehaviour");
125     addTest(root, &TestGetNextErrorBehaviour,      "tsconv/ncnvtst/TestGetNextErrorBehaviour");
126#endif
127
128     addTest(root, &TestAvailableConverters,        "tsconv/ncnvtst/TestAvailableConverters");
129     addTest(root, &TestFlushInternalBuffer,        "tsconv/ncnvtst/TestFlushInternalBuffer");
130     addTest(root, &TestResetBehaviour,             "tsconv/ncnvtst/TestResetBehaviour");
131     addTest(root, &TestRegressionUTF8,             "tsconv/ncnvtst/TestRegressionUTF8");
132     addTest(root, &TestRegressionUTF32,            "tsconv/ncnvtst/TestRegressionUTF32");
133     addTest(root, &TestTruncated,                  "tsconv/ncnvtst/TestTruncated");
134     addTest(root, &TestUnicodeSet,                 "tsconv/ncnvtst/TestUnicodeSet");
135}
136
137/*test surrogate behaviour*/
138static void TestSurrogateBehaviour(){
139    log_verbose("Testing for SBCS and LATIN_1\n");
140    {
141        UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032};
142        const uint8_t expected[] = {0x31, 0x1a, 0x32};
143
144#if !UCONFIG_NO_LEGACY_CONVERSION
145        /*SBCS*/
146        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
147                expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR))
148            log_err("u-> ibm-920 [UCNV_SBCS] not match.\n");
149#endif
150
151        /*LATIN_1*/
152        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
153                expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR ))
154            log_err("u-> LATIN_1 not match.\n");
155
156    }
157
158#if !UCONFIG_NO_LEGACY_CONVERSION
159    log_verbose("Testing for DBCS and MBCS\n");
160    {
161        UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
162        const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
163        int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
164
165        /*DBCS*/
166        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
167                expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
168            log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
169        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
170                expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR))
171            log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
172        /*MBCS*/
173        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
174                expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
175            log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
176        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
177                expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR))
178            log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
179    }
180
181    log_verbose("Testing for ISO-2022-jp\n");
182    {
183        UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
184
185        const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
186                                    0x31,0x1A, 0x32};
187
188
189        int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
190
191        /*iso-2022-jp*/
192        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
193                expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR))
194            log_err("u-> not match.\n");
195        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
196                expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR))
197            log_err("u->  not match.\n");
198    }
199
200    log_verbose("Testing for ISO-2022-cn\n");
201    {
202        static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
203
204        static const uint8_t expected[] = {
205                                    0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
206                                    0x36, 0x21,
207                                    0x0F, 0x31,
208                                    0x1A,
209                                    0x32
210                                    };
211
212
213
214        static const int32_t offsets[] = {
215                                    0,    0,    0,    0,    0,    0,    0,
216                                    1,    1,
217                                    2,    2,
218                                    3,
219                                    5,  };
220
221        /*iso-2022-CN*/
222        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
223                expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR))
224            log_err("u-> not match.\n");
225        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
226                expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR))
227            log_err("u-> not match.\n");
228    }
229
230        log_verbose("Testing for ISO-2022-kr\n");
231    {
232        static const UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
233
234        static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
235                                    0x0E, 0x6C, 0x69,
236                                    0x0f, 0x1A,
237                                    0x0e, 0x6F, 0x4B,
238                                    0x0F, 0x31,
239                                    0x1A,
240                                    0x32 };
241
242        static const int32_t offsets[] = {-1, -1, -1, -1,
243                              0, 0, 0,
244                              1, 1,
245                              3, 3, 3,
246                              4, 4,
247                              5,
248                              7,
249                            };
250
251        /*iso-2022-kr*/
252        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
253                expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR))
254            log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
255        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
256                expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR))
257            log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
258    }
259
260        log_verbose("Testing for HZ\n");
261    {
262        static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
263
264        static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
265                                    0x7E, 0x7D, 0x1A,
266                                    0x7E, 0x7B, 0x36, 0x21,
267                                    0x7E, 0x7D, 0x31,
268                                    0x1A,
269                                    0x32 };
270
271
272        static const int32_t offsets[] = {0,0,0,0,
273                             1,1,1,
274                             3,3,3,3,
275                             4,4,4,
276                             5,
277                             7,};
278
279        /*hz*/
280        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
281                expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR))
282            log_err("u-> HZ not match.\n");
283        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
284                expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR))
285            log_err("u-> HZ not match.\n");
286    }
287#endif
288
289    /*UTF-8*/
290     log_verbose("Testing for UTF8\n");
291    {
292        static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
293        static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
294                           0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
295                           0x04, 0x06 };
296        static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
297            0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
298
299
300        static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
301        /*UTF-8*/
302        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
303            expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR ))
304            log_err("u-> UTF8 with offsets and flush true did not match.\n");
305        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
306            expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR ))
307            log_err("u-> UTF8 with offsets and flush true did not match.\n");
308        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
309            expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR ))
310            log_err("u-> UTF8 with offsets and flush true did not match.\n");
311        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
312            expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR ))
313            log_err("u-> UTF8 with offsets and flush true did not match.\n");
314
315        if(!convertToU(expected, sizeof(expected),
316            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR ))
317            log_err("UTF8 -> u did not match.\n");
318        if(!convertToU(expected, sizeof(expected),
319            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR ))
320            log_err("UTF8 -> u did not match.\n");
321        if(!convertToU(expected, sizeof(expected),
322            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR ))
323            log_err("UTF8 ->u  did not match.\n");
324        if(!convertToU(expected, sizeof(expected),
325            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR ))
326            log_err("UTF8 -> u did not match.\n");
327
328    }
329}
330
331/*test various error behaviours*/
332static void TestErrorBehaviour(){
333    log_verbose("Testing for SBCS and LATIN_1\n");
334    {
335        static const UChar    sampleText[] =   { 0x0031, 0xd801};
336        static const UChar    sampleText2[] =   { 0x0031, 0xd801, 0x0032};
337        static const uint8_t expected0[] =          { 0x31};
338        static const uint8_t expected[] =          { 0x31, 0x1a};
339        static const uint8_t expected2[] =         { 0x31, 0x1a, 0x32};
340
341#if !UCONFIG_NO_LEGACY_CONVERSION
342        /*SBCS*/
343        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
344                expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR))
345            log_err("u-> ibm-920 [UCNV_SBCS] \n");
346        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
347                expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR))
348            log_err("u-> ibm-920 [UCNV_SBCS] \n");
349        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
350                expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR))
351            log_err("u-> ibm-920 [UCNV_SBCS] did not match\n");
352#endif
353
354        /*LATIN_1*/
355        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
356                expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
357            log_err("u-> LATIN_1 is supposed to fail\n");
358        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
359                expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR))
360            log_err("u-> LATIN_1 is supposed to fail\n");
361
362        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
363                expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
364            log_err("u-> LATIN_1 did not match\n");
365    }
366
367#if !UCONFIG_NO_LEGACY_CONVERSION
368    log_verbose("Testing for DBCS and MBCS\n");
369    {
370        static const UChar    sampleText[]    = { 0x00a1, 0xd801};
371        static const uint8_t expected[] = { 0xa2, 0xae};
372        static const int32_t offsets[]        = { 0x00, 0x00};
373        static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0};
374        static const int32_t offsetsSUB[]        = { 0x00, 0x00, 0x01, 0x01};
375
376        static const UChar       sampleText2[] = { 0x00a1, 0xd801, 0x00a4};
377        static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
378        static const int32_t offsets2[]        = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02};
379
380        static const UChar       sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01};
381        static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0};
382        static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x01, 0x02, 0x02};
383
384        static const UChar       sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01};
385        static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe};
386        static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 };
387
388        /*DBCS*/
389        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
390                expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
391            log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
392        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
393                expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
394            log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
395
396        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
397                expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR))
398            log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
399        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
400                expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_ZERO_ERROR))
401            log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
402
403
404        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
405                expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
406            log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
407        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
408                expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR))
409            log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
410
411        /*MBCS*/
412        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
413                expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
414            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
415        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
416                expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
417            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
418
419        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
420                expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
421            log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
422        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
423                expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
424            log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
425        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
426                expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR))
427            log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
428
429        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
430                expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR))
431            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
432        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
433                expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR))
434            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
435
436        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
437                expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
438            log_err("u-> euc-jp [UCNV_MBCS] \n");
439        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
440                expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
441            log_err("u-> euc-jp [UCNV_MBCS] \n");
442    }
443
444    /*iso-2022-jp*/
445    log_verbose("Testing for iso-2022-jp\n");
446    {
447        static const UChar    sampleText[]    = { 0x0031, 0xd801};
448        static const uint8_t expected[] = {  0x31};
449        static const uint8_t expectedSUB[] = {  0x31, 0x1a};
450        static const int32_t offsets[]        = { 0x00, 1};
451
452        static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
453        static const uint8_t expected2[] = {  0x31,0x1A,0x32};
454        static const int32_t offsets2[]        = { 0x00,0x01,0x02};
455
456        static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
457        static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a};
458        static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 };
459        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
460                expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR))
461            log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
462        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
463                expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_ZERO_ERROR))
464            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
465
466        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
467                expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR))
468            log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n");
469        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
470                expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
471            log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
472        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
473                expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
474            log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
475
476        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
477                expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
478            log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
479        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
480                expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
481            log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
482    }
483
484    /*iso-2022-cn*/
485    log_verbose("Testing for iso-2022-cn\n");
486    {
487        static const UChar    sampleText[]    = { 0x0031, 0xd801};
488        static const uint8_t expected[] = { 0x31};
489        static const uint8_t expectedSUB[] = { 0x31, 0x1A};
490        static const int32_t offsets[]        = { 0x00, 1};
491
492        static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
493        static const uint8_t expected2[] = { 0x31, 0x1A,0x32};
494        static const int32_t offsets2[]        = { 0x00, 0x01,0x02};
495
496        static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
497        static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A};
498        static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x02 };
499
500        static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
501        static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a };
502        static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 };
503        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
504                expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR))
505            log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
506        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
507                expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR))
508            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
509
510        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
511                expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR))
512            log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n");
513        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
514                expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
515            log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
516        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
517                expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
518            log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
519
520        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
521                expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR))
522            log_err("u->iso-2022-cn [UCNV_MBCS] \n");
523        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
524                expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR))
525            log_err("u-> iso-2022-cn[UCNV_MBCS] \n");
526
527        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
528                expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR))
529            log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
530        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
531                expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR))
532            log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
533    }
534
535    /*iso-2022-kr*/
536    log_verbose("Testing for iso-2022-kr\n");
537    {
538        static const UChar    sampleText[]    = { 0x0031, 0xd801};
539        static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31};
540        static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A};
541        static const int32_t offsets[]        = { -1,   -1,   -1,   -1,   0x00, 1};
542
543        static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
544        static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32};
545        static const int32_t offsets2[]        = { -1,   -1,   -1,   -1,   0x00, 0x01, 0x02};
546
547        static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
548        static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43,  0x51, 0x50, 0x1A };
549        static const int32_t offsets3MBCS[]        = { -1,   -1,   -1,   -1,    0x00, 0x01, 0x02, 0x02 };
550
551        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
552                expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR))
553            log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
554        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
555                expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR))
556            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
557
558        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
559                expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR))
560            log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n");
561        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
562                expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
563            log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
564        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
565                expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
566            log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
567
568        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
569                expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR))
570            log_err("u->iso-2022-kr [UCNV_MBCS] \n");
571        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
572                expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR))
573            log_err("u-> iso-2022-kr[UCNV_MBCS] \n");
574    }
575
576    /*HZ*/
577    log_verbose("Testing for HZ\n");
578    {
579        static const UChar    sampleText[]    = { 0x0031, 0xd801};
580        static const uint8_t expected[] = { 0x7e, 0x7d, 0x31};
581        static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A};
582        static const int32_t offsets[]        = { 0x00, 0x00, 0x00, 1};
583
584        static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
585        static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31,  0x1A,  0x32 };
586        static const int32_t offsets2[]        = { 0x00, 0x00, 0x00, 0x01,  0x02 };
587
588        static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
589        static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50,  0x1A };
590        static const int32_t offsets3MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x02};
591
592        static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
593        static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a };
594        static const int32_t offsets4MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 };
595        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
596                expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR))
597            log_err("u-> HZ [UCNV_MBCS] \n");
598        if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
599                expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR))
600            log_err("u-> ibm-1363 [UCNV_MBCS] \n");
601
602        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
603                expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR))
604            log_err("u->HZ[UCNV_DBCS] did not match\n");
605        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
606                expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
607            log_err("u-> HZ [UCNV_DBCS] did not match\n");
608        if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
609                expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
610            log_err("u-> HZ [UCNV_DBCS] did not match\n");
611
612        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
613                expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR))
614            log_err("u->HZ [UCNV_MBCS] \n");
615        if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
616                expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR))
617            log_err("u-> HZ[UCNV_MBCS] \n");
618
619        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
620                expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR))
621            log_err("u-> HZ [UCNV_MBCS] \n");
622        if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
623                expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR))
624            log_err("u-> HZ [UCNV_MBCS] \n");
625    }
626#endif
627}
628
629#if !UCONFIG_NO_LEGACY_CONVERSION
630/*test different convertToUnicode error behaviours*/
631static void TestToUnicodeErrorBehaviour()
632{
633    log_verbose("Testing error conditions for DBCS\n");
634    {
635        uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04};
636        const UChar expected[] = { 0x00a1 };
637
638        if(!convertToU(sampleText, sizeof(sampleText),
639                expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_ZERO_ERROR ))
640            log_err("DBCS (ibm-1363)->Unicode  did not match.\n");
641        if(!convertToU(sampleText, sizeof(sampleText),
642                expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_ZERO_ERROR ))
643            log_err("DBCS (ibm-1363)->Unicode  with flush = false did not match.\n");
644    }
645    log_verbose("Testing error conditions for SBCS\n");
646    {
647        uint8_t sampleText[] = { 0xa2, 0xFF};
648        const UChar expected[] = { 0x00c2 };
649
650      /*  uint8_t sampleText2[] = { 0xa2, 0x70 };
651        const UChar expected2[] = { 0x0073 };*/
652
653        if(!convertToU(sampleText, sizeof(sampleText),
654                expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR ))
655            log_err("SBCS (ibm-1051)->Unicode  did not match.\n");
656        if(!convertToU(sampleText, sizeof(sampleText),
657                expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR ))
658            log_err("SBCS (ibm-1051)->Unicode  with flush = false did not match.\n");
659
660    }
661}
662
663static void TestGetNextErrorBehaviour(){
664   /*Test for unassigned character*/
665#define INPUT_SIZE 1
666    static const char input1[INPUT_SIZE]={ 0x70 };
667    const char* source=(const char*)input1;
668    UErrorCode err=U_ZERO_ERROR;
669    UChar32 c=0;
670    UConverter *cnv=ucnv_open("ibm-424", &err);
671    if(U_FAILURE(err)) {
672        log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err));
673        return;
674    }
675    c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err);
676    if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
677        log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n",  myErrorName(err), c);
678    }
679    ucnv_close(cnv);
680}
681#endif
682
683#define MAX_UTF16_LEN 2
684#define MAX_UTF8_LEN 4
685
686/*Regression test for utf8 converter*/
687static void TestRegressionUTF8(){
688    UChar32 currCh = 0;
689    int32_t offset8;
690    int32_t offset16;
691    UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
692    uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH);
693
694    while (currCh <= UNICODE_LIMIT) {
695        offset16 = 0;
696        offset8 = 0;
697        while(currCh <= UNICODE_LIMIT
698            && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
699            && offset8 < (MAX_LENGTH - MAX_UTF8_LEN))
700        {
701            if (currCh == SURROGATE_HIGH_START) {
702                currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
703            }
704            UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
705            UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh);
706            currCh++;
707        }
708        if(!convertFromU(standardForm, offset16,
709            utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
710            log_err("Unicode->UTF8 did not match.\n");
711        }
712        if(!convertToU(utf8, offset8,
713            standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
714            log_err("UTF8->Unicode did not match.\n");
715        }
716    }
717
718    free(standardForm);
719    free(utf8);
720
721    {
722        static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 };
723        static const UChar expected[] = { 0x0301, 0x0300 };
724        UConverter *conv8;
725        UErrorCode err = U_ZERO_ERROR;
726        UChar pivotBuffer[100];
727        const UChar* const pivEnd = pivotBuffer + 100;
728        const char* srcBeg;
729        const char* srcEnd;
730        UChar* pivBeg;
731
732        conv8 = ucnv_open("UTF-8", &err);
733
734        srcBeg = src8;
735        pivBeg = pivotBuffer;
736        srcEnd = src8 + 3;
737        ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
738        if (srcBeg != srcEnd) {
739            log_err("Did not consume whole buffer on first call.\n");
740        }
741
742        srcEnd = src8 + 4;
743        ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
744        if (srcBeg != srcEnd) {
745            log_err("Did not consume whole buffer on second call.\n");
746        }
747
748        if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
749            log_err("Did not get expected results for UTF-8.\n");
750        }
751        ucnv_close(conv8);
752    }
753}
754
755#define MAX_UTF32_LEN 1
756
757static void TestRegressionUTF32(){
758    UChar32 currCh = 0;
759    int32_t offset32;
760    int32_t offset16;
761    UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
762    UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32));
763
764    while (currCh <= UNICODE_LIMIT) {
765        offset16 = 0;
766        offset32 = 0;
767        while(currCh <= UNICODE_LIMIT
768            && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
769            && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN))
770        {
771            if (currCh == SURROGATE_HIGH_START) {
772                currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
773            }
774            UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
775            UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh);
776            currCh++;
777        }
778        if(!convertFromU(standardForm, offset16,
779            (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
780            log_err("Unicode->UTF32 did not match.\n");
781        }
782        if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32),
783            standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
784            log_err("UTF32->Unicode did not match.\n");
785        }
786    }
787    free(standardForm);
788    free(utf32);
789
790    {
791        /* Check for lone surrogate error handling. */
792        static const UChar   sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 };
793        static const UChar   sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 };
794        static const uint8_t expectedUTF32BE[] = {
795            0x00, 0x00, 0x00, 0x31,
796            0x00, 0x00, 0xff, 0xfd,
797            0x00, 0x00, 0x00, 0x32
798        };
799        static const uint8_t expectedUTF32LE[] = {
800            0x31, 0x00, 0x00, 0x00,
801            0xfd, 0xff, 0x00, 0x00,
802            0x32, 0x00, 0x00, 0x00
803        };
804        static const int32_t offsetsUTF32[] = {
805            0x00, 0x00, 0x00, 0x00,
806            0x01, 0x01, 0x01, 0x01,
807            0x02, 0x02, 0x02, 0x02
808        };
809
810        if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]),
811                expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
812            log_err("u->UTF-32BE\n");
813        if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]),
814                expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
815            log_err("u->UTF-32BE\n");
816
817        if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]),
818                expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
819            log_err("u->UTF-32LE\n");
820        if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]),
821                expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
822            log_err("u->UTF-32LE\n");
823    }
824
825    {
826        static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 };
827        static const UChar expected[] = { 0x0031, 0x0030 };
828        UConverter *convBE;
829        UErrorCode err = U_ZERO_ERROR;
830        UChar pivotBuffer[100];
831        const UChar* const pivEnd = pivotBuffer + 100;
832        const char* srcBeg;
833        const char* srcEnd;
834        UChar* pivBeg;
835
836        convBE = ucnv_open("UTF-32BE", &err);
837
838        srcBeg = srcBE;
839        pivBeg = pivotBuffer;
840        srcEnd = srcBE + 5;
841        ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
842        if (srcBeg != srcEnd) {
843            log_err("Did not consume whole buffer on first call.\n");
844        }
845
846        srcEnd = srcBE + 8;
847        ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
848        if (srcBeg != srcEnd) {
849            log_err("Did not consume whole buffer on second call.\n");
850        }
851
852        if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
853            log_err("Did not get expected results for UTF-32BE.\n");
854        }
855        ucnv_close(convBE);
856    }
857    {
858        static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 };
859        static const UChar expected[] = { 0x0031, 0x0030 };
860        UConverter *convLE;
861        UErrorCode err = U_ZERO_ERROR;
862        UChar pivotBuffer[100];
863        const UChar* const pivEnd = pivotBuffer + 100;
864        const char* srcBeg;
865        const char* srcEnd;
866        UChar* pivBeg;
867
868        convLE = ucnv_open("UTF-32LE", &err);
869
870        srcBeg = srcLE;
871        pivBeg = pivotBuffer;
872        srcEnd = srcLE + 5;
873        ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
874        if (srcBeg != srcEnd) {
875            log_err("Did not consume whole buffer on first call.\n");
876        }
877
878        srcEnd = srcLE + 8;
879        ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
880        if (srcBeg != srcEnd) {
881            log_err("Did not consume whole buffer on second call.\n");
882        }
883
884        if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
885            log_err("Did not get expected results for UTF-32LE.\n");
886        }
887        ucnv_close(convLE);
888    }
889}
890
891/*Walk through the available converters*/
892static void TestAvailableConverters(){
893    UErrorCode status=U_ZERO_ERROR;
894    UConverter *conv=NULL;
895    int32_t i=0;
896    for(i=0; i < ucnv_countAvailable(); i++){
897        status=U_ZERO_ERROR;
898        conv=ucnv_open(ucnv_getAvailableName(i), &status);
899        if(U_FAILURE(status)){
900            log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n",
901                        ucnv_getAvailableName(i), myErrorName(status));
902            continue;
903        }
904        ucnv_close(conv);
905    }
906
907}
908
909static void TestFlushInternalBuffer(){
910    TestWithBufferSize(MAX_LENGTH, 1);
911    TestWithBufferSize(1, 1);
912    TestWithBufferSize(1, MAX_LENGTH);
913    TestWithBufferSize(MAX_LENGTH, MAX_LENGTH);
914}
915
916static void TestWithBufferSize(int32_t insize, int32_t outsize){
917
918    gInBufferSize =insize;
919    gOutBufferSize = outsize;
920
921     log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
922    {
923        UChar    sampleText[] =
924            { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09,  0x002E  };
925        const uint8_t expectedUTF8[] =
926            { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
927        int32_t  toUTF8Offs[] =
928            { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
929       /* int32_t fmUTF8Offs[] =
930            { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/
931
932        /*UTF-8*/
933        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
934            expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE))
935             log_err("u-> UTF8 did not match.\n");
936    }
937
938#if !UCONFIG_NO_LEGACY_CONVERSION
939     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
940    {
941        UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
942        const uint8_t toIBM943[]= { 0x61,
943            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
944            0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
945            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
946            0x61 };
947        int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
948
949        if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
950                toIBM943, sizeof(toIBM943), "ibm-943",
951                (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE))
952            log_err("u-> ibm-943 with subst with value did not match.\n");
953    }
954#endif
955
956     log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
957    {
958        const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
959            0xe0, 0x80,  0x61};
960        UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0x0061};
961        int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0006};
962
963        if(!testConvertToU(sampleText1, sizeof(sampleText1),
964                 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE))
965            log_err("utf8->u with substitute did not match.\n");;
966    }
967
968#if !UCONFIG_NO_LEGACY_CONVERSION
969    log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
970    /*to Unicode*/
971    {
972        const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
973            0x81, 0xad, /*unassigned*/
974            0x89, 0xd3 };
975        UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
976            0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
977            0x7B87};
978        int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
979
980        if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU),
981                 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
982                (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE))
983            log_err("ibm-943->u with substitute with value did not match.\n");
984
985    }
986#endif
987}
988
989static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
990                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
991{
992
993    int32_t i=0;
994    char *p=0;
995    const UChar *src;
996    char buffer[MAX_LENGTH];
997    int32_t offsetBuffer[MAX_LENGTH];
998    int32_t *offs=0;
999    char *targ;
1000    char *targetLimit;
1001    UChar *sourceLimit=0;
1002    UErrorCode status = U_ZERO_ERROR;
1003    UConverter *conv = 0;
1004    conv = ucnv_open(codepage, &status);
1005    if(U_FAILURE(status))
1006    {
1007        log_data_err("Couldn't open converter %s\n",codepage);
1008        return TRUE;
1009    }
1010    log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
1011
1012    for(i=0; i<MAX_LENGTH; i++){
1013        buffer[i]=(char)0xF0;
1014        offsetBuffer[i]=0xFF;
1015    }
1016
1017    src=source;
1018    sourceLimit=(UChar*)src+(sourceLen);
1019    targ=buffer;
1020    targetLimit=targ+MAX_LENGTH;
1021    offs=offsetBuffer;
1022    ucnv_fromUnicode (conv,
1023                  (char **)&targ,
1024                  (const char *)targetLimit,
1025                  &src,
1026                  sourceLimit,
1027                  expectOffsets ? offs : NULL,
1028                  doFlush,
1029                  &status);
1030    ucnv_close(conv);
1031    if(status != expectedStatus){
1032          log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
1033          return FALSE;
1034    }
1035
1036    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1037        sourceLen, targ-buffer);
1038
1039    if(expectLen != targ-buffer)
1040    {
1041        log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
1042        log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
1043        printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer));
1044        printSeqErr((const unsigned char*)expect, expectLen);
1045        return FALSE;
1046    }
1047
1048    if(memcmp(buffer, expect, expectLen)){
1049        log_err("String does not match. FROM Unicode to codePage%s\n", codepage);
1050        log_info("\nGot:");
1051        printSeqErr((const unsigned char *)buffer, expectLen);
1052        log_info("\nExpected:");
1053        printSeqErr((const unsigned char *)expect, expectLen);
1054        return FALSE;
1055    }
1056    else {
1057        log_verbose("Matches!\n");
1058    }
1059
1060    if (expectOffsets != 0){
1061        log_verbose("comparing %d offsets..\n", targ-buffer);
1062        if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){
1063            log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage);
1064            log_info("\nGot  : ");
1065            printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer));
1066            for(p=buffer;p<targ;p++)
1067                log_info("%d, ", offsetBuffer[p-buffer]);
1068            log_info("\nExpected: ");
1069            for(i=0; i< (targ-buffer); i++)
1070                log_info("%d,", expectOffsets[i]);
1071        }
1072    }
1073
1074    return TRUE;
1075}
1076
1077
1078static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
1079               const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
1080{
1081    UErrorCode status = U_ZERO_ERROR;
1082    UConverter *conv = 0;
1083    int32_t i=0;
1084    UChar *p=0;
1085    const char* src;
1086    UChar buffer[MAX_LENGTH];
1087    int32_t offsetBuffer[MAX_LENGTH];
1088    int32_t *offs=0;
1089    UChar *targ;
1090    UChar *targetLimit;
1091    uint8_t *sourceLimit=0;
1092
1093
1094
1095    conv = ucnv_open(codepage, &status);
1096    if(U_FAILURE(status))
1097    {
1098        log_data_err("Couldn't open converter %s\n",codepage);
1099        return TRUE;
1100    }
1101    log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
1102
1103
1104
1105    for(i=0; i<MAX_LENGTH; i++){
1106        buffer[i]=0xFFFE;
1107        offsetBuffer[i]=-1;
1108    }
1109
1110    src=(const char *)source;
1111    sourceLimit=(uint8_t*)(src+(sourceLen));
1112    targ=buffer;
1113    targetLimit=targ+MAX_LENGTH;
1114    offs=offsetBuffer;
1115
1116
1117
1118    ucnv_toUnicode (conv,
1119                &targ,
1120                targetLimit,
1121                (const char **)&src,
1122                (const char *)sourceLimit,
1123                expectOffsets ? offs : NULL,
1124                doFlush,
1125                &status);
1126
1127    ucnv_close(conv);
1128    if(status != expectedStatus){
1129          log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
1130          return FALSE;
1131    }
1132    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1133        sourceLen, targ-buffer);
1134
1135
1136
1137
1138    log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2);
1139
1140    if (expectOffsets != 0) {
1141        if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){
1142
1143            log_err("did not get the expected offsets from %s To UNICODE\n", codepage);
1144            log_info("\nGot : ");
1145            for(p=buffer;p<targ;p++)
1146                log_info("%d, ", offsetBuffer[p-buffer]);
1147            log_info("\nExpected: ");
1148            for(i=0; i<(targ-buffer); i++)
1149                log_info("%d, ", expectOffsets[i]);
1150            log_info("\nGot result:");
1151            for(i=0; i<(targ-buffer); i++)
1152                log_info("0x%04X,", buffer[i]);
1153            log_info("\nFrom Input:");
1154            for(i=0; i<(src-(const char *)source); i++)
1155                log_info("0x%02X,", (unsigned char)source[i]);
1156            log_info("\n");
1157        }
1158    }
1159    if(memcmp(buffer, expect, expectLen*2)){
1160        log_err("String does not match. from codePage %s TO Unicode\n", codepage);
1161        log_info("\nGot:");
1162        printUSeqErr(buffer, expectLen);
1163        log_info("\nExpected:");
1164        printUSeqErr(expect, expectLen);
1165        return FALSE;
1166    }
1167    else {
1168        log_verbose("Matches!\n");
1169    }
1170
1171    return TRUE;
1172}
1173
1174
1175static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
1176                const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset)
1177{
1178    UErrorCode status = U_ZERO_ERROR;
1179    UConverter *conv = 0;
1180    char    junkout[MAX_LENGTH]; /* FIX */
1181    int32_t    junokout[MAX_LENGTH]; /* FIX */
1182    char *p;
1183    const UChar *src;
1184    char *end;
1185    char *targ;
1186    int32_t *offs;
1187    int i;
1188    int32_t   realBufferSize;
1189    char *realBufferEnd;
1190    const UChar *realSourceEnd;
1191    const UChar *sourceLimit;
1192    UBool checkOffsets = TRUE;
1193    UBool doFlush;
1194
1195    UConverterFromUCallback oldAction = NULL;
1196    const void* oldContext = NULL;
1197
1198    for(i=0;i<MAX_LENGTH;i++)
1199        junkout[i] = (char)0xF0;
1200    for(i=0;i<MAX_LENGTH;i++)
1201        junokout[i] = 0xFF;
1202
1203    setNuConvTestName(codepage, "FROM");
1204
1205    log_verbose("\n=========  %s\n", gNuConvTestName);
1206
1207    conv = ucnv_open(codepage, &status);
1208    if(U_FAILURE(status))
1209    {
1210        log_data_err("Couldn't open converter %s\n",codepage);
1211        return TRUE;
1212    }
1213
1214    log_verbose("Converter opened..\n");
1215    /*----setting the callback routine----*/
1216    ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1217    if (U_FAILURE(status)) {
1218        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1219    }
1220    /*------------------------*/
1221
1222    src = source;
1223    targ = junkout;
1224    offs = junokout;
1225
1226    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
1227    realBufferEnd = junkout + realBufferSize;
1228    realSourceEnd = source + sourceLen;
1229
1230    if ( gOutBufferSize != realBufferSize )
1231      checkOffsets = FALSE;
1232
1233    if( gInBufferSize != MAX_LENGTH )
1234      checkOffsets = FALSE;
1235
1236    do
1237    {
1238        end = nct_min(targ + gOutBufferSize, realBufferEnd);
1239        sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
1240
1241        doFlush = (UBool)(sourceLimit == realSourceEnd);
1242
1243        if(targ == realBufferEnd)
1244          {
1245        log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
1246        return FALSE;
1247          }
1248        log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
1249
1250
1251        status = U_ZERO_ERROR;
1252        if(gInBufferSize ==999 && gOutBufferSize==999)
1253            doFlush = FALSE;
1254        ucnv_fromUnicode (conv,
1255                  (char **)&targ,
1256                  (const char *)end,
1257                  &src,
1258                  sourceLimit,
1259                  offs,
1260                  doFlush, /* flush if we're at the end of the input data */
1261                  &status);
1262        if(testReset)
1263            ucnv_resetToUnicode(conv);
1264        if(gInBufferSize ==999 && gOutBufferSize==999)
1265            ucnv_resetToUnicode(conv);
1266
1267      } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
1268
1269    if(U_FAILURE(status)) {
1270        log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1271        return FALSE;
1272      }
1273
1274    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1275        sourceLen, targ-junkout);
1276    if(getTestOption(VERBOSITY_OPTION))
1277    {
1278        char junk[999];
1279        char offset_str[999];
1280        char *ptr;
1281
1282        junk[0] = 0;
1283        offset_str[0] = 0;
1284        for(ptr = junkout;ptr<targ;ptr++)
1285        {
1286            sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr);
1287            sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]);
1288        }
1289
1290        log_verbose(junk);
1291        printSeq((const unsigned char *)expect, expectLen);
1292        if ( checkOffsets )
1293          {
1294            log_verbose("\nOffsets:");
1295            log_verbose(offset_str);
1296          }
1297        log_verbose("\n");
1298    }
1299    ucnv_close(conv);
1300
1301
1302    if(expectLen != targ-junkout)
1303    {
1304        log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1305        log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1306        log_info("\nGot:");
1307        printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
1308        log_info("\nExpected:");
1309        printSeqErr((const unsigned char*)expect, expectLen);
1310        return FALSE;
1311    }
1312
1313    if (checkOffsets && (expectOffsets != 0) )
1314    {
1315        log_verbose("comparing %d offsets..\n", targ-junkout);
1316        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
1317            log_err("did not get the expected offsets. %s", gNuConvTestName);
1318            log_err("Got  : ");
1319            printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
1320            for(p=junkout;p<targ;p++)
1321                log_err("%d, ", junokout[p-junkout]);
1322            log_err("\nExpected: ");
1323            for(i=0; i<(targ-junkout); i++)
1324                log_err("%d,", expectOffsets[i]);
1325        }
1326    }
1327
1328    log_verbose("comparing..\n");
1329    if(!memcmp(junkout, expect, expectLen))
1330    {
1331        log_verbose("Matches!\n");
1332        return TRUE;
1333    }
1334    else
1335    {
1336        log_err("String does not match. %s\n", gNuConvTestName);
1337        printUSeqErr(source, sourceLen);
1338        log_info("\nGot:");
1339        printSeqErr((const unsigned char *)junkout, expectLen);
1340        log_info("\nExpected:");
1341        printSeqErr((const unsigned char *)expect, expectLen);
1342
1343        return FALSE;
1344    }
1345}
1346
1347static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
1348               const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset)
1349{
1350    UErrorCode status = U_ZERO_ERROR;
1351    UConverter *conv = 0;
1352    UChar    junkout[MAX_LENGTH]; /* FIX */
1353    int32_t    junokout[MAX_LENGTH]; /* FIX */
1354    const char *src;
1355    const char *realSourceEnd;
1356    const char *srcLimit;
1357    UChar *p;
1358    UChar *targ;
1359    UChar *end;
1360    int32_t *offs;
1361    int i;
1362    UBool   checkOffsets = TRUE;
1363    int32_t   realBufferSize;
1364    UChar *realBufferEnd;
1365    UBool doFlush;
1366
1367    UConverterToUCallback oldAction = NULL;
1368    const void* oldContext = NULL;
1369
1370
1371    for(i=0;i<MAX_LENGTH;i++)
1372        junkout[i] = 0xFFFE;
1373
1374    for(i=0;i<MAX_LENGTH;i++)
1375        junokout[i] = -1;
1376
1377    setNuConvTestName(codepage, "TO");
1378
1379    log_verbose("\n=========  %s\n", gNuConvTestName);
1380
1381    conv = ucnv_open(codepage, &status);
1382    if(U_FAILURE(status))
1383    {
1384        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
1385        return TRUE;
1386    }
1387
1388    log_verbose("Converter opened..\n");
1389     /*----setting the callback routine----*/
1390    ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1391    if (U_FAILURE(status)) {
1392        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1393    }
1394    /*-------------------------------------*/
1395
1396    src = (const char *)source;
1397    targ = junkout;
1398    offs = junokout;
1399
1400    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
1401    realBufferEnd = junkout + realBufferSize;
1402    realSourceEnd = src + sourcelen;
1403
1404    if ( gOutBufferSize != realBufferSize )
1405      checkOffsets = FALSE;
1406
1407    if( gInBufferSize != MAX_LENGTH )
1408      checkOffsets = FALSE;
1409
1410    do
1411      {
1412        end = nct_min( targ + gOutBufferSize, realBufferEnd);
1413        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
1414
1415        if(targ == realBufferEnd)
1416        {
1417            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
1418            return FALSE;
1419        }
1420        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
1421
1422        /* oldTarg = targ; */
1423
1424        status = U_ZERO_ERROR;
1425        doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE);
1426
1427        ucnv_toUnicode (conv,
1428                &targ,
1429                end,
1430                (const char **)&src,
1431                (const char *)srcLimit,
1432                offs,
1433                doFlush, /* flush if we're at the end of hte source data */
1434                &status);
1435        if(testReset)
1436            ucnv_resetFromUnicode(conv);
1437        if(gInBufferSize ==999 && gOutBufferSize==999)
1438            ucnv_resetToUnicode(conv);
1439        /*        offs += (targ-oldTarg); */
1440
1441      } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
1442
1443    if(U_FAILURE(status))
1444    {
1445        log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1446        return FALSE;
1447    }
1448
1449    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
1450        sourcelen, targ-junkout);
1451    if(getTestOption(VERBOSITY_OPTION))
1452    {
1453        char junk[999];
1454        char offset_str[999];
1455
1456        UChar *ptr;
1457
1458        junk[0] = 0;
1459        offset_str[0] = 0;
1460
1461        for(ptr = junkout;ptr<targ;ptr++)
1462        {
1463            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
1464            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
1465        }
1466
1467        log_verbose(junk);
1468
1469        if ( checkOffsets )
1470          {
1471            log_verbose("\nOffsets:");
1472            log_verbose(offset_str);
1473          }
1474        log_verbose("\n");
1475    }
1476    ucnv_close(conv);
1477
1478    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
1479
1480    if (checkOffsets && (expectOffsets != 0))
1481    {
1482        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
1483
1484            log_err("did not get the expected offsets. %s",gNuConvTestName);
1485            for(p=junkout;p<targ;p++)
1486                log_err("%d, ", junokout[p-junkout]);
1487            log_err("\nExpected: ");
1488            for(i=0; i<(targ-junkout); i++)
1489                log_err("%d,", expectOffsets[i]);
1490            log_err("");
1491            for(i=0; i<(targ-junkout); i++)
1492                log_err("%X,", junkout[i]);
1493            log_err("");
1494            for(i=0; i<(src-(const char *)source); i++)
1495                log_err("%X,", (unsigned char)source[i]);
1496        }
1497    }
1498
1499    if(!memcmp(junkout, expect, expectlen*2))
1500    {
1501        log_verbose("Matches!\n");
1502        return TRUE;
1503    }
1504    else
1505    {
1506        log_err("String does not match. %s\n", gNuConvTestName);
1507        log_verbose("String does not match. %s\n", gNuConvTestName);
1508        log_info("\nGot:");
1509        printUSeq(junkout, expectlen);
1510        log_info("\nExpected:");
1511        printUSeq(expect, expectlen);
1512        return FALSE;
1513    }
1514}
1515
1516
1517static void TestResetBehaviour(void){
1518#if !UCONFIG_NO_LEGACY_CONVERSION
1519    log_verbose("Testing Reset for DBCS and MBCS\n");
1520    {
1521        static const UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
1522        static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
1523        static const int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
1524
1525
1526        static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8};
1527        static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7};
1528        static const int32_t offsets1[] =  { 0,2,4,6};
1529
1530        /*DBCS*/
1531        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1532                expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1533            log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1534        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1535                expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1536            log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1537
1538        if(!testConvertToU(expected1, sizeof(expected1),
1539                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1540                offsets1, TRUE))
1541           log_err("ibm-1363 -> did not match.\n");
1542        /*MBCS*/
1543        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1544                expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1545            log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1546        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1547                expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1548            log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1549
1550        if(!testConvertToU(expected1, sizeof(expected1),
1551                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1552                offsets1, TRUE))
1553           log_err("ibm-1363 -> did not match.\n");
1554
1555    }
1556
1557    log_verbose("Testing Reset for ISO-2022-jp\n");
1558    {
1559        static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1560
1561        static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1562                                    0x31,0x1A, 0x32};
1563
1564
1565        static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
1566
1567
1568        static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1569        static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1570                                    0x31,0x1A, 0x32};
1571        static const int32_t offsets1[] =  { 3,5,10,11,12};
1572
1573        /*iso-2022-jp*/
1574        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1575                expected, sizeof(expected), "iso-2022-jp",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1576            log_err("u-> not match.\n");
1577        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1578                expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1579            log_err("u->  not match.\n");
1580
1581        if(!testConvertToU(expected1, sizeof(expected1),
1582                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1583                offsets1, TRUE))
1584           log_err("iso-2022-jp -> did not match.\n");
1585
1586    }
1587
1588    log_verbose("Testing Reset for ISO-2022-cn\n");
1589    {
1590        static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1591
1592        static const uint8_t expected[] = {
1593                                    0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1594                                    0x36, 0x21,
1595                                    0x0f, 0x31,
1596                                    0x1A,
1597                                    0x32
1598                                    };
1599
1600
1601        static const int32_t offsets[] = {
1602                                    0,    0,    0,    0,    0,    0,    0,
1603                                    1,    1,
1604                                    2,    2,
1605                                    3,
1606                                    5,  };
1607
1608        UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1609        static const uint8_t expected1[] = {
1610                                    0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1611                                    0x36, 0x21,
1612                                    0x1B, 0x24, 0x29, 0x47, 0x24, 0x22,
1613                                    0x0f, 0x1A,
1614                                    0x32
1615                                    };
1616        static const int32_t offsets1[] =  { 5,7,13,16,17};
1617
1618        /*iso-2022-CN*/
1619        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1620                expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1621            log_err("u-> not match.\n");
1622        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1623                expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1624            log_err("u-> not match.\n");
1625
1626        if(!testConvertToU(expected1, sizeof(expected1),
1627                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1628                offsets1, TRUE))
1629           log_err("iso-2022-cn -> did not match.\n");
1630    }
1631
1632        log_verbose("Testing Reset for ISO-2022-kr\n");
1633    {
1634        UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1635
1636        static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
1637                                    0x0E, 0x6C, 0x69,
1638                                    0x0f, 0x1A,
1639                                    0x0e, 0x6F, 0x4B,
1640                                    0x0F, 0x31,
1641                                    0x1A,
1642                                    0x32 };
1643
1644        static const int32_t offsets[] = {-1, -1, -1, -1,
1645                              0, 0, 0,
1646                              1, 1,
1647                              3, 3, 3,
1648                              4, 4,
1649                              5,
1650                              7,
1651                            };
1652        static const UChar    sampleText1[] =   { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032};
1653
1654        static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43,
1655                                    0x0E, 0x6C, 0x69,
1656                                    0x0f, 0x41,
1657                                    0x0e, 0x6F, 0x4B,
1658                                    0x0F, 0x31,
1659                                    0x42,
1660                                    0x32 };
1661
1662        static const int32_t offsets1[] = {
1663                              5, 8, 10,
1664                              13, 14, 15
1665
1666                            };
1667        /*iso-2022-kr*/
1668        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1669                expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1670            log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1671        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1672                expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1673            log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1674        if(!testConvertToU(expected1, sizeof(expected1),
1675                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1676                offsets1, TRUE))
1677           log_err("iso-2022-kr -> did not match.\n");
1678    }
1679
1680        log_verbose("Testing Reset for HZ\n");
1681    {
1682        static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1683
1684        static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
1685                                    0x7E, 0x7D, 0x1A,
1686                                    0x7E, 0x7B, 0x36, 0x21,
1687                                    0x7E, 0x7D, 0x31,
1688                                    0x1A,
1689                                    0x32 };
1690
1691
1692        static const int32_t offsets[] = {0,0,0,0,
1693                             1,1,1,
1694                             3,3,3,3,
1695                             4,4,4,
1696                             5,
1697                             7,};
1698        static const UChar    sampleText1[] =   { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032};
1699
1700        static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B,
1701                                    0x7E, 0x7D, 0x35,
1702                                    0x7E, 0x7B, 0x36, 0x21,
1703                                    0x7E, 0x7D, 0x31,
1704                                    0x41,
1705                                    0x32 };
1706
1707
1708        static const int32_t offsets1[] = {2,6,9,13,14,15
1709                            };
1710
1711        /*hz*/
1712        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1713                expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1714            log_err("u->  not match.\n");
1715        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1716                expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1717            log_err("u->  not match.\n");
1718        if(!testConvertToU(expected1, sizeof(expected1),
1719                sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1720                offsets1, TRUE))
1721           log_err("hz -> did not match.\n");
1722    }
1723#endif
1724
1725    /*UTF-8*/
1726     log_verbose("Testing for UTF8\n");
1727    {
1728        static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
1729        int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
1730                           0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
1731                           0x04, 0x06 };
1732        static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
1733            0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
1734
1735
1736        static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
1737        /*UTF-8*/
1738        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1739            expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1740            log_err("u-> UTF8 with offsets and flush true did not match.\n");
1741        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1742            expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1743            log_err("u-> UTF8 with offsets and flush true did not match.\n");
1744        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1745            expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
1746            log_err("u-> UTF8 with offsets and flush true did not match.\n");
1747        if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1748            expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
1749            log_err("u-> UTF8 with offsets and flush true did not match.\n");
1750        if(!testConvertToU(expected, sizeof(expected),
1751            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1752            log_err("UTF8 -> did not match.\n");
1753        if(!testConvertToU(expected, sizeof(expected),
1754            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
1755            log_err("UTF8 -> did not match.\n");
1756        if(!testConvertToU(expected, sizeof(expected),
1757            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
1758            log_err("UTF8 -> did not match.\n");
1759        if(!testConvertToU(expected, sizeof(expected),
1760            sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
1761            log_err("UTF8 -> did not match.\n");
1762
1763    }
1764
1765}
1766
1767/* Test that U_TRUNCATED_CHAR_FOUND is set. */
1768static void
1769doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {
1770    UConverter *cnv;
1771
1772    UChar buffer[2];
1773    UChar *target, *targetLimit;
1774    const char *source, *sourceLimit;
1775
1776    UErrorCode errorCode;
1777
1778    errorCode=U_ZERO_ERROR;
1779    cnv=ucnv_open(cnvName, &errorCode);
1780    if(U_FAILURE(errorCode)) {
1781        log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode));
1782        return;
1783    }
1784    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
1785    if(U_FAILURE(errorCode)) {
1786        log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n",
1787                    cnvName, u_errorName(errorCode));
1788        ucnv_close(cnv);
1789        return;
1790    }
1791
1792    source=(const char *)bytes;
1793    sourceLimit=source+length;
1794    target=buffer;
1795    targetLimit=buffer+LENGTHOF(buffer);
1796
1797    /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */
1798    ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode);
1799    if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) {
1800        log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n",
1801                cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1802    }
1803
1804    errorCode=U_ZERO_ERROR;
1805    source=sourceLimit;
1806    target=buffer;
1807    ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
1808    if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) {
1809        log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n",
1810                cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer));
1811    }
1812
1813    /* 2. input bytes with flush=TRUE */
1814    ucnv_resetToUnicode(cnv);
1815
1816    errorCode=U_ZERO_ERROR;
1817    source=(const char *)bytes;
1818    target=buffer;
1819    ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
1820    if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) {
1821        log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n",
1822                cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1823    }
1824
1825
1826    ucnv_close(cnv);
1827}
1828
1829static void
1830TestTruncated() {
1831    static const struct {
1832        const char *cnvName;
1833        uint8_t bytes[8]; /* partial input bytes resulting in no output */
1834        int32_t length;
1835    } testCases[]={
1836        { "IMAP-mailbox-name",  { 0x26 }, 1 }, /* & */
1837        { "IMAP-mailbox-name",  { 0x26, 0x42 }, 2 }, /* &B */
1838        { "IMAP-mailbox-name",  { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
1839        { "IMAP-mailbox-name",  { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
1840
1841        { "UTF-7",      { 0x2b, 0x42 }, 2 }, /* +B */
1842        { "UTF-8",      { 0xd1 }, 1 },
1843
1844        { "UTF-16BE",   { 0x4e }, 1 },
1845        { "UTF-16LE",   { 0x4e }, 1 },
1846        { "UTF-16",     { 0x4e }, 1 },
1847        { "UTF-16",     { 0xff }, 1 },
1848        { "UTF-16",     { 0xfe, 0xff, 0x4e }, 3 },
1849
1850        { "UTF-32BE",   { 0, 0, 0x4e }, 3 },
1851        { "UTF-32LE",   { 0x4e }, 1 },
1852        { "UTF-32",     { 0, 0, 0x4e }, 3 },
1853        { "UTF-32",     { 0xff }, 1 },
1854        { "UTF-32",     { 0, 0, 0xfe, 0xff, 0 }, 5 },
1855        { "SCSU",       { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */
1856
1857#if !UCONFIG_NO_LEGACY_CONVERSION
1858        { "BOCU-1",     { 0xd5 }, 1 },
1859
1860        { "Shift-JIS",  { 0xe0 }, 1 },
1861
1862        { "ibm-939",    { 0x0e, 0x41 }, 2 } /* SO 0x41 */
1863#else
1864        { "BOCU-1",     { 0xd5 }, 1 ,}
1865#endif
1866    };
1867    int32_t i;
1868
1869    for(i=0; i<LENGTHOF(testCases); ++i) {
1870        doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
1871    }
1872}
1873
1874typedef struct NameRange {
1875    const char *name;
1876    UChar32 start, end, start2, end2, notStart, notEnd;
1877} NameRange;
1878
1879static void
1880TestUnicodeSet() {
1881    UErrorCode errorCode;
1882    UConverter *cnv;
1883    USet *set;
1884    const char *name;
1885    int32_t i, count;
1886
1887    static const char *const completeSetNames[]={
1888        "UTF-7",
1889        "UTF-8",
1890        "UTF-16",
1891        "UTF-16BE",
1892        "UTF-16LE",
1893        "UTF-32",
1894        "UTF-32BE",
1895        "UTF-32LE",
1896        "SCSU",
1897        "BOCU-1",
1898        "CESU-8",
1899#if !UCONFIG_NO_LEGACY_CONVERSION
1900        "gb18030",
1901#endif
1902        "IMAP-mailbox-name"
1903    };
1904#if !UCONFIG_NO_LEGACY_CONVERSION
1905    static const char *const lmbcsNames[]={
1906        "LMBCS-1",
1907        "LMBCS-2",
1908        "LMBCS-3",
1909        "LMBCS-4",
1910        "LMBCS-5",
1911        "LMBCS-6",
1912        "LMBCS-8",
1913        "LMBCS-11",
1914        "LMBCS-16",
1915        "LMBCS-17",
1916        "LMBCS-18",
1917        "LMBCS-19"
1918    };
1919#endif
1920
1921    static const NameRange nameRanges[]={
1922        { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1923#if !UCONFIG_NO_LEGACY_CONVERSION
1924        { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1925#endif
1926        { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
1927#if !UCONFIG_NO_LEGACY_CONVERSION
1928        { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
1929        { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
1930        /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */
1931        { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
1932#else
1933        { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }
1934#endif
1935    };
1936
1937    /* open an empty set */
1938    set=uset_open(1, 0);
1939
1940    count=ucnv_countAvailable();
1941    for(i=0; i<count; ++i) {
1942        errorCode=U_ZERO_ERROR;
1943        name=ucnv_getAvailableName(i);
1944        cnv=ucnv_open(name, &errorCode);
1945        if(U_FAILURE(errorCode)) {
1946            log_data_err("error: unable to open converter %s - %s\n",
1947                    name, u_errorName(errorCode));
1948            continue;
1949        }
1950
1951        uset_clear(set);
1952        ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
1953        if(U_FAILURE(errorCode)) {
1954            log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
1955                    name, u_errorName(errorCode));
1956        } else if(uset_size(set)==0) {
1957            log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
1958        }
1959
1960        ucnv_close(cnv);
1961    }
1962
1963    /* test converters that are known to convert all of Unicode (except maybe for surrogates) */
1964    for(i=0; i<LENGTHOF(completeSetNames); ++i) {
1965        errorCode=U_ZERO_ERROR;
1966        name=completeSetNames[i];
1967        cnv=ucnv_open(name, &errorCode);
1968        if(U_FAILURE(errorCode)) {
1969            log_data_err("error: unable to open converter %s - %s\n",
1970                    name, u_errorName(errorCode));
1971            continue;
1972        }
1973
1974        uset_clear(set);
1975        ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
1976        if(U_FAILURE(errorCode)) {
1977            log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
1978                    name, u_errorName(errorCode));
1979        } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
1980            log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
1981        }
1982
1983        ucnv_close(cnv);
1984    }
1985
1986#if !UCONFIG_NO_LEGACY_CONVERSION
1987    /* test LMBCS variants which convert all of Unicode except for U+F6xx */
1988    for(i=0; i<LENGTHOF(lmbcsNames); ++i) {
1989        errorCode=U_ZERO_ERROR;
1990        name=lmbcsNames[i];
1991        cnv=ucnv_open(name, &errorCode);
1992        if(U_FAILURE(errorCode)) {
1993            log_data_err("error: unable to open converter %s - %s\n",
1994                    name, u_errorName(errorCode));
1995            continue;
1996        }
1997
1998        uset_clear(set);
1999        ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2000        if(U_FAILURE(errorCode)) {
2001            log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2002                    name, u_errorName(errorCode));
2003        } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) {
2004            log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name);
2005        }
2006
2007        ucnv_close(cnv);
2008    }
2009#endif
2010
2011    /* test specific sets */
2012    for(i=0; i<LENGTHOF(nameRanges); ++i) {
2013        errorCode=U_ZERO_ERROR;
2014        name=nameRanges[i].name;
2015        cnv=ucnv_open(name, &errorCode);
2016        if(U_FAILURE(errorCode)) {
2017            log_data_err("error: unable to open converter %s - %s\n",
2018                         name, u_errorName(errorCode));
2019            continue;
2020        }
2021
2022        uset_clear(set);
2023        ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2024        if(U_FAILURE(errorCode)) {
2025            log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2026                    name, u_errorName(errorCode));
2027        } else if(
2028            !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
2029            (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2))
2030        ) {
2031            log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
2032        } else if(nameRanges[i].notStart>=0) {
2033            /* simulate containsAny() with the C API */
2034            uset_complement(set);
2035            if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
2036                log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
2037            }
2038        }
2039
2040        ucnv_close(cnv);
2041    }
2042
2043    errorCode = U_ZERO_ERROR;
2044    ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode);
2045    if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2046        log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
2047    }
2048    errorCode = U_PARSE_ERROR;
2049    /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */
2050    ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode);
2051    if (errorCode != U_PARSE_ERROR) {
2052        log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
2053    }
2054
2055    uset_close(set);
2056}
2057