1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*
7********************************************************************************
8* File NCCBTST.C
9*
10* Modification History:
11*        Name                            Description
12*    Madhu Katragadda     7/21/1999      Testing error callback routines
13********************************************************************************
14*/
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <ctype.h>
19#include "cstring.h"
20#include "unicode/uloc.h"
21#include "unicode/ucnv.h"
22#include "unicode/ucnv_err.h"
23#include "cintltst.h"
24#include "unicode/utypes.h"
25#include "unicode/ustring.h"
26#include "nccbtst.h"
27#include "unicode/ucnv_cb.h"
28#include "unicode/utf16.h"
29
30#define NEW_MAX_BUFFER 999
31
32#define nct_min(x,y)  ((x<y) ? x : y)
33#define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0]))
34
35static int32_t  gInBufferSize = 0;
36static int32_t  gOutBufferSize = 0;
37static char     gNuConvTestName[1024];
38
39static void printSeq(const uint8_t* a, int len)
40{
41    int i=0;
42    log_verbose("\n{");
43    while (i<len)
44        log_verbose("0x%02X, ", a[i++]);
45    log_verbose("}\n");
46}
47
48static void printUSeq(const UChar* a, int len)
49{
50    int i=0;
51    log_verbose("{");
52    while (i<len)
53        log_verbose("  0x%04x, ", a[i++]);
54    log_verbose("}\n");
55}
56
57static void printSeqErr(const uint8_t* a, int len)
58{
59    int i=0;
60    fprintf(stderr, "{");
61    while (i<len)
62        fprintf(stderr, "  0x%02x, ", a[i++]);
63    fprintf(stderr, "}\n");
64}
65
66static void printUSeqErr(const UChar* a, int len)
67{
68    int i=0;
69    fprintf(stderr, "{");
70    while (i<len)
71        fprintf(stderr, "0x%04x, ", a[i++]);
72    fprintf(stderr,"}\n");
73}
74
75static void setNuConvTestName(const char *codepage, const char *direction)
76{
77    sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
78            codepage,
79            direction,
80            (int)gInBufferSize,
81            (int)gOutBufferSize);
82}
83
84
85static void TestCallBackFailure(void);
86
87void addTestConvertErrorCallBack(TestNode** root);
88
89void addTestConvertErrorCallBack(TestNode** root)
90{
91    addTest(root, &TestSkipCallBack,  "tsconv/nccbtst/TestSkipCallBack");
92    addTest(root, &TestStopCallBack,  "tsconv/nccbtst/TestStopCallBack");
93    addTest(root, &TestSubCallBack,   "tsconv/nccbtst/TestSubCallBack");
94    /* BEGIN android-removed
95       To save space, Android does not build complete CJK conversion tables.
96       We skip the test here.
97    addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
98       END android-removed */
99
100#if !UCONFIG_NO_LEGACY_CONVERSION
101    addTest(root, &TestLegalAndOtherCallBack,  "tsconv/nccbtst/TestLegalAndOtherCallBack");
102    addTest(root, &TestSingleByteCallBack,  "tsconv/nccbtst/TestSingleByteCallBack");
103#endif
104
105    addTest(root, &TestCallBackFailure,  "tsconv/nccbtst/TestCallBackFailure");
106}
107
108static void TestSkipCallBack()
109{
110    TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
111    TestSkip(1,NEW_MAX_BUFFER);
112    TestSkip(1,1);
113    TestSkip(NEW_MAX_BUFFER, 1);
114}
115
116static void TestStopCallBack()
117{
118    TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
119    TestStop(1,NEW_MAX_BUFFER);
120    TestStop(1,1);
121    TestStop(NEW_MAX_BUFFER, 1);
122}
123
124static void TestSubCallBack()
125{
126    TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
127    TestSub(1,NEW_MAX_BUFFER);
128    TestSub(1,1);
129    TestSub(NEW_MAX_BUFFER, 1);
130
131#if !UCONFIG_NO_LEGACY_CONVERSION
132    TestEBCDIC_STATEFUL_Sub(1, 1);
133    TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
134    TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
135    TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
136#endif
137}
138
139static void TestSubWithValueCallBack()
140{
141    TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
142    TestSubWithValue(1,NEW_MAX_BUFFER);
143    TestSubWithValue(1,1);
144    TestSubWithValue(NEW_MAX_BUFFER, 1);
145}
146
147#if !UCONFIG_NO_LEGACY_CONVERSION
148static void TestLegalAndOtherCallBack()
149{
150    TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
151    TestLegalAndOthers(1,NEW_MAX_BUFFER);
152    TestLegalAndOthers(1,1);
153    TestLegalAndOthers(NEW_MAX_BUFFER, 1);
154}
155
156static void TestSingleByteCallBack()
157{
158    TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
159    TestSingleByte(1,NEW_MAX_BUFFER);
160    TestSingleByte(1,1);
161    TestSingleByte(NEW_MAX_BUFFER, 1);
162}
163#endif
164
165static void TestSkip(int32_t inputsize, int32_t outputsize)
166{
167    static const uint8_t expskipIBM_949[]= {
168        0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
169
170    static const uint8_t expskipIBM_943[] = {
171        0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
172
173    static const uint8_t expskipIBM_930[] = {
174        0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
175
176    gInBufferSize = inputsize;
177    gOutBufferSize = outputsize;
178
179    /*From Unicode*/
180    log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP  \n");
181
182#if !UCONFIG_NO_LEGACY_CONVERSION
183    {
184        static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
185        static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
186
187        static const int32_t  toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
188        static const int32_t  toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
189
190        if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
191                expskipIBM_949, sizeof(expskipIBM_949), "ibm-949",
192                UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
193            log_err("u-> ibm-949 with skip did not match.\n");
194        if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
195                expskipIBM_943, sizeof(expskipIBM_943), "ibm-943",
196                UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
197            log_err("u-> ibm-943 with skip did not match.\n");
198    }
199
200    {
201        static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
202        static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
203        static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
204
205        /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
206        if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR,
207                                   fromUBytes, sizeof(fromUBytes),
208                                   "ibm-930",
209                                   UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
210                                   NULL, 0)
211        ) {
212            log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
213        }
214    }
215#endif
216
217    {
218        static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
219        static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
220        static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
221
222        static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
223        static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
224        static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
225
226        /* US-ASCII */
227        if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
228                                   usasciiFromUBytes, sizeof(usasciiFromUBytes),
229                                   "US-ASCII",
230                                   UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
231                                   NULL, 0)
232        ) {
233            log_err("u->US-ASCII with skip did not match.\n");
234        }
235
236#if !UCONFIG_NO_LEGACY_CONVERSION
237        /* SBCS NLTC codepage 367 for US-ASCII */
238        if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
239                                   usasciiFromUBytes, sizeof(usasciiFromUBytes),
240                                   "ibm-367",
241                                   UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
242                                   NULL, 0)
243        ) {
244            log_err("u->ibm-367 with skip did not match.\n");
245        }
246#endif
247
248        /* ISO-Latin-1 */
249        if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
250                                   latin1FromUBytes, sizeof(latin1FromUBytes),
251                                   "LATIN_1",
252                                   UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
253                                   NULL, 0)
254        ) {
255            log_err("u->LATIN_1 with skip did not match.\n");
256        }
257
258#if !UCONFIG_NO_LEGACY_CONVERSION
259        /* windows-1252 */
260        if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
261                                   latin1FromUBytes, sizeof(latin1FromUBytes),
262                                   "windows-1252",
263                                   UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
264                                   NULL, 0)
265        ) {
266            log_err("u->windows-1252 with skip did not match.\n");
267        }
268    }
269
270    {
271        static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
272        static const uint8_t toIBM943[]= { 0x61, 0x61 };
273        static const int32_t offset[]= {0, 4};
274
275         /* EUC_JP*/
276        static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
277        static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
278            0x61, 0x8e, 0xe0,
279        };
280        static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
281
282        /*EUC_TW*/
283        static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
284        static const uint8_t to_euc_tw[]={
285            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
286            0x61, 0xe6, 0xca, 0x8a,
287        };
288        static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
289
290        /*ISO-2022-JP*/
291        static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
292        static const uint8_t to_iso_2022_jp[]={
293            0x41,
294            0x42,
295
296        };
297        static const int32_t from_iso_2022_jpOffs [] ={0,2};
298
299        /*ISO-2022-JP*/
300        UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
301        static const uint8_t to_iso_2022_jp2[]={
302            0x41,
303            0x43,
304
305        };
306        static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
307
308        /*ISO-2022-cn*/
309        static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
310        static const uint8_t to_iso_2022_cn[]={
311            0x41, 0x42
312        };
313        static const int32_t from_iso_2022_cnOffs [] ={
314            0, 2
315        };
316
317        /*ISO-2022-CN*/
318        static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
319        static const uint8_t to_iso_2022_cn1[]={
320            0x41, 0x43
321
322        };
323        static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
324
325        /*ISO-2022-kr*/
326        static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
327        static const uint8_t to_iso_2022_kr[]={
328            0x1b,   0x24,   0x29,   0x43,
329            0x41,
330            0x0e,   0x25,   0x50,
331            0x25,   0x50,
332            0x0f,   0x42,
333        };
334        static const int32_t from_iso_2022_krOffs [] ={
335            -1,-1,-1,-1,
336            0,
337            1,1,1,
338            3,3,
339            4,4
340        };
341
342        /*ISO-2022-kr*/
343        static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
344        static const uint8_t to_iso_2022_kr1[]={
345            0x1b,   0x24,   0x29,   0x43,
346            0x41,
347            0x0e,   0x25,   0x50,
348            0x25,   0x50,
349
350        };
351        static const int32_t from_iso_2022_krOffs1 [] ={
352            -1,-1,-1,-1,
353            0,
354            1,1,1,
355            3,3,
356
357        };
358        /* HZ encoding */
359        static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
360
361        static const uint8_t to_hz[]={
362            0x7e,   0x7d,   0x41,
363            0x7e,   0x7b,   0x26,   0x30,
364            0x26,   0x30,
365            0x7e,   0x7d,   0x42,
366
367        };
368        static const int32_t from_hzOffs [] ={
369            0,0,0,
370            1,1,1,1,
371            3,3,
372            4,4,4,4
373        };
374
375        static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
376
377        static const uint8_t to_hz1[]={
378            0x7e,   0x7d,   0x41,
379            0x7e,   0x7b,   0x26,   0x30,
380            0x26,   0x30,
381
382
383        };
384        static const int32_t from_hzOffs1 [] ={
385            0,0,0,
386            1,1,1,1,
387            3,3,
388
389        };
390
391#endif
392
393        static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
394
395        static const uint8_t to_SCSU[]={
396            0x41,
397            0x42
398
399
400        };
401        static const int32_t from_SCSUOffs [] ={
402            0,
403            2,
404
405        };
406
407#if !UCONFIG_NO_LEGACY_CONVERSION
408        /* ISCII */
409        static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
410        static const uint8_t to_iscii[]={
411            0x41,
412            0x42,
413        };
414        static const int32_t from_isciiOffs [] ={
415            0,2,
416
417        };
418        /*ISCII*/
419        static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
420        static const uint8_t to_iscii1[]={
421            0x44,
422            0x43,
423
424        };
425        static const int32_t from_isciiOffs1 [] ={0,2};
426
427        if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
428                toIBM943, sizeof(toIBM943), "ibm-943",
429                UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
430            log_err("u-> ibm-943 with skip did not match.\n");
431
432        if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
433                to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
434                UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
435            log_err("u-> euc-jp with skip did not match.\n");
436
437        if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
438                to_euc_tw, sizeof(to_euc_tw), "euc-tw",
439                UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
440            log_err("u-> euc-tw with skip did not match.\n");
441
442        /*iso_2022_jp*/
443        if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
444                to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
445                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
446            log_err("u-> iso-2022-jp with skip did not match.\n");
447
448        /* with context */
449        if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
450                to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
451                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
452            log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
453
454        /*iso_2022_cn*/
455        if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
456                to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
457                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
458            log_err("u-> iso-2022-cn with skip did not match.\n");
459        /*with context*/
460        if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]),
461                to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn",
462                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
463            log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
464
465        /*iso_2022_kr*/
466        if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
467                to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
468                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
469            log_err("u-> iso-2022-kr with skip did not match.\n");
470          /*with context*/
471        if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]),
472                to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr",
473                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
474            log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
475
476        /*hz*/
477        if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
478                to_hz, sizeof(to_hz), "HZ",
479                UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
480            log_err("u-> HZ with skip did not match.\n");
481          /*with context*/
482        if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]),
483                to_hz1, sizeof(to_hz1), "hz",
484                UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
485            log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
486#endif
487
488        /*SCSU*/
489        if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
490                to_SCSU, sizeof(to_SCSU), "SCSU",
491                UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
492            log_err("u-> SCSU with skip did not match.\n");
493
494#if !UCONFIG_NO_LEGACY_CONVERSION
495        /*ISCII*/
496        if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
497                to_iscii, sizeof(to_iscii), "ISCII,version=0",
498                UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
499            log_err("u-> iscii with skip did not match.\n");
500        /*with context*/
501        if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]),
502                to_iscii1, sizeof(to_iscii1), "ISCII,version=0",
503                UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
504            log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
505#endif
506    }
507
508    log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
509    {
510        static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
511            0xFB, 0xEE, 0x28,       /* from source offset 0 */
512            0x24, 0x1E, 0x52,
513            0xB2,
514            0x20,
515            0xB3,
516            0xB1,
517            0x0D,
518            0x0A,
519
520            0x20,                   /* from 8 */
521            0x00,
522            0xD0, 0x6C,
523            0xB6,
524            0xD8, 0xA5,
525            0x20,
526            0x68,
527            0x59,
528
529            0xF9, 0x28,             /* from 16 */
530            0x6D,
531            0x20,
532            0x73,
533            0xE0, 0x2D,
534            0xDE, 0x43,
535            0xD0, 0x33,
536            0x20,
537
538            0xFA, 0x83,             /* from 24 */
539            0x25, 0x01,
540            0xFB, 0x16, 0x87,
541            0x4B, 0x16,
542            0x20,
543            0xE6, 0xBD,
544            0xEB, 0x5B,
545            0x4B, 0xCC,
546
547            0xF9, 0xA2,             /* from 32 */
548            0xFC, 0x10, 0x3E,
549            0xFE, 0x16, 0x3A, 0x8C,
550            0x20,
551            0xFC, 0x03, 0xAC,
552
553            0x01,                   /* from 41 */
554            0xDE, 0x83,
555            0x20,
556            0x09
557        };
558        static const UChar expected[]={
559            0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
560            0x0063, 0x0061, 0x000D, 0x000A,
561
562            0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
563            0x0930, 0x0020, 0x0918, 0x0909,
564
565            0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
566            0x4000, 0x4E00, 0x7777, 0x0020,
567
568            0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
569            0x0020, 0xD7A3, 0xDC00, 0xD800,
570
571            0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
572            0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
573
574            0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
575            0x0009
576        };
577        static const int32_t offsets[]={
578            0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
579            8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
580            16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
581            24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
582            32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
583            41, 42, 42, 43, 44
584        };
585
586        /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
587        if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
588                                 sampleText, sizeof(sampleText),
589                                 "BOCU-1",
590                                 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
591        ) {
592            log_err("u->BOCU-1 with skip did not match.\n");
593        }
594    }
595
596    log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
597    {
598        const uint8_t sampleText[]={
599            0x61,                               /* 'a' */
600            0xc4, 0xb5,                         /* U+0135 */
601            0xed, 0x80, 0xa0,                   /* Hangul U+d020 */
602            0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
603            0xee, 0x80, 0x80,                   /* PUA U+e000 */
604            0xed, 0xb0, 0x81,                   /* unpaired trail surrogate U+dc01 */
605            0x62,                               /* 'b' */
606            0xed, 0xa0, 0x81,                   /* unpaired lead surrogate U+d801 */
607            0xd0, 0x80                          /* U+0400 */
608        };
609        UChar expected[]={
610            0x0061,
611            0x0135,
612            0xd020,
613            0xd801, 0xdc01,
614            0xe000,
615            0xdc01,
616            0x0062,
617            0xd801,
618            0x0400
619        };
620        int32_t offsets[]={
621            0,
622            1, 1,
623            2, 2, 2,
624            3, 3, 3, 4, 4, 4,
625            5, 5, 5,
626            6, 6, 6,
627            7,
628            8, 8, 8,
629            9, 9
630        };
631
632        /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
633
634        /* without offsets */
635        if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
636                                 sampleText, sizeof(sampleText),
637                                 "CESU-8",
638                                 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
639        ) {
640            log_err("u->CESU-8 with skip did not match.\n");
641        }
642
643        /* with offsets */
644        if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
645                                 sampleText, sizeof(sampleText),
646                                 "CESU-8",
647                                 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
648        ) {
649            log_err("u->CESU-8 with skip did not match.\n");
650        }
651    }
652
653    /*to Unicode*/
654    log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP  \n");
655
656#if !UCONFIG_NO_LEGACY_CONVERSION
657    {
658
659        static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
660        static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
661        static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
662
663        static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5};
664        static const int32_t  fromIBM943Offs [] = { 0, 2, 4};
665        static const int32_t  fromIBM930Offs [] = { 1, 3, 5};
666
667        if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949),
668                 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949",
669                UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
670            log_err("ibm-949->u with skip did not match.\n");
671        if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943),
672                 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943",
673                UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
674            log_err("ibm-943->u with skip did not match.\n");
675
676
677        if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930),
678                 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
679                UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
680            log_err("ibm-930->u with skip did not match.\n");
681
682
683        if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930),
684                 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
685                UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
686            log_err("ibm-930->u with skip did not match.\n");
687    }
688#endif
689
690    {
691        static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
692        static const UChar usasciiToU[] = { 0x61, 0x31 };
693        static const int32_t usasciiToUOffsets[] = { 0, 2 };
694
695        static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
696        static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
697        static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
698
699        /* US-ASCII */
700        if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
701                                 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
702                                 "US-ASCII",
703                                 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
704                                 NULL, 0)
705        ) {
706            log_err("US-ASCII->u with skip did not match.\n");
707        }
708
709#if !UCONFIG_NO_LEGACY_CONVERSION
710        /* SBCS NLTC codepage 367 for US-ASCII */
711        if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
712                                 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
713                                 "ibm-367",
714                                 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
715                                 NULL, 0)
716        ) {
717            log_err("ibm-367->u with skip did not match.\n");
718        }
719#endif
720
721        /* ISO-Latin-1 */
722        if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
723                                 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
724                                 "LATIN_1",
725                                 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
726                                 NULL, 0)
727        ) {
728            log_err("LATIN_1->u with skip did not match.\n");
729        }
730
731#if !UCONFIG_NO_LEGACY_CONVERSION
732        /* windows-1252 */
733        if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
734                                 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
735                                 "windows-1252",
736                                 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
737                                 NULL, 0)
738        ) {
739            log_err("windows-1252->u with skip did not match.\n");
740        }
741#endif
742    }
743
744#if !UCONFIG_NO_LEGACY_CONVERSION
745    {
746        static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
747            0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
748        };
749        static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0x03b4
750        };
751        static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
752
753
754         /* euc-jp*/
755        static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
756            0x8f, 0xda, 0xa1,  /*unassigned*/
757           0x8e, 0xe0,
758        };
759        static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
760        static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
761
762         /*EUC_TW*/
763        static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
764            0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
765           0xe6, 0xca, 0x8a,
766        };
767        static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
768        static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
769                /*iso-2022-jp*/
770        static const uint8_t sampleTxt_iso_2022_jp[]={
771            0x41,
772            0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
773            0x1b,   0x28,   0x42,   0x42,
774
775        };
776        static const UChar iso_2022_jptoUnicode[]={    0x41,0x42 };
777        static const int32_t from_iso_2022_jpOffs [] ={  0,9   };
778
779        /*iso-2022-cn*/
780        static const uint8_t sampleTxt_iso_2022_cn[]={
781            0x0f,   0x41,   0x44,
782            0x1B,   0x24,   0x29,   0x47,
783            0x0E,   0x40,   0x6f, /*unassigned*/
784            0x0f,   0x42,
785
786        };
787
788        static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x42 };
789        static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   11   };
790
791        /*iso-2022-kr*/
792        static const uint8_t sampleTxt_iso_2022_kr[]={
793          0x1b, 0x24, 0x29,  0x43,
794          0x41,
795          0x0E, 0x7f, 0x1E,
796          0x0e, 0x25, 0x50,
797          0x0f, 0x51,
798          0x42, 0x43,
799
800        };
801        static const UChar iso_2022_krtoUnicode[]={     0x41,0x03A0,0x51, 0x42,0x43};
802        static const int32_t from_iso_2022_krOffs [] ={  4,    9,    12,   13  , 14 };
803
804        /*hz*/
805        static const uint8_t sampleTxt_hz[]={
806            0x41,
807            0x7e,   0x7b,   0x26,   0x30,
808            0x7f,   0x1E, /*unassigned*/
809            0x26,   0x30,
810            0x7e,   0x7d,   0x42,
811            0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
812            0x7e,   0x7d,   0x42,
813        };
814        static const UChar hztoUnicode[]={
815            0x41,
816            0x03a0,
817            0x03A0,
818            0x42,
819            0x42,};
820
821        static const int32_t from_hzOffs [] ={0,3,7,11,18,  };
822
823        /*ISCII*/
824        static const uint8_t sampleTxt_iscii[]={
825            0x41,
826            0xa1,
827            0xEB,    /*unassigned*/
828            0x26,
829            0x30,
830            0xa2,
831            0xEC,    /*unassigned*/
832            0x42,
833        };
834        static const UChar isciitoUnicode[]={
835            0x41,
836            0x0901,
837            0x26,
838            0x30,
839            0x0902,
840            0x42,
841            };
842
843        static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
844
845        /*LMBCS*/
846        static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
847            0x12, 0x92, 0xa0, /*unassigned*/
848            0x12, 0x92, 0xA1,
849        };
850        static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
851        static const int32_t fromLMBCS[] = {0, 6};
852
853        if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
854             EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
855            UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
856        log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
857
858        if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
859             EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
860            UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
861        log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
862
863        if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
864                 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
865                UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
866            log_err("euc-jp->u with skip did not match.\n");
867
868
869
870        if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
871                 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
872                UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
873            log_err("euc-tw->u with skip did not match.\n");
874
875
876        if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
877                 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
878                UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
879            log_err("iso-2022-jp->u with skip did not match.\n");
880
881        if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
882                 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
883                UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
884            log_err("iso-2022-cn->u with skip did not match.\n");
885
886        if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
887                 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
888                UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
889            log_err("iso-2022-kr->u with skip did not match.\n");
890
891        if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
892                 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
893                UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
894            log_err("HZ->u with skip did not match.\n");
895
896        if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
897                 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
898                UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
899            log_err("iscii->u with skip did not match.\n");
900
901        if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS),
902                LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1",
903                UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
904            log_err("LMBCS->u with skip did not match.\n");
905
906    }
907#endif
908
909    log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
910    {
911        const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
912            0xe0, 0x80,  0x61,};
913        UChar    expected1[] = {  0x0031, 0x4e8c, 0x0061};
914        int32_t offsets1[] = {   0x0000, 0x0001, 0x0006};
915
916        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
917                 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
918                UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
919            log_err("utf8->u with skip did not match.\n");;
920    }
921
922    log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
923    {
924        const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
925        UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffe,0xfffe};
926        int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
927
928        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
929                 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
930                UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
931            log_err("scsu->u with skip did not match.\n");
932    }
933
934    log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
935    {
936        const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
937            0xFB, 0xEE, 0x28,       /* single-code point sequence at offset 0 */
938            0x24, 0x1E, 0x52,       /* 3 */
939            0xB2,                   /* 6 */
940            0x20,                   /* 7 */
941            0x40, 0x07,             /* 8 - wrong trail byte */
942            0xB3,                   /* 10 */
943            0xB1,                   /* 11 */
944            0xD0, 0x20,             /* 12 - wrong trail byte */
945            0x0D,                   /* 14 */
946            0x0A,                   /* 15 */
947            0x20,                   /* 16 */
948            0x00,                   /* 17 */
949            0xD0, 0x6C,             /* 18 */
950            0xB6,                   /* 20 */
951            0xD8, 0xA5,             /* 21 */
952            0x20,                   /* 23 */
953            0x68,                   /* 24 */
954            0x59,                   /* 25 */
955            0xF9, 0x28,             /* 26 */
956            0x6D,                   /* 28 */
957            0x20,                   /* 29 */
958            0x73,                   /* 30 */
959            0xE0, 0x2D,             /* 31 */
960            0xDE, 0x43,             /* 33 */
961            0xD0, 0x33,             /* 35 */
962            0x20,                   /* 37 */
963            0xFA, 0x83,             /* 38 */
964            0x25, 0x01,             /* 40 */
965            0xFB, 0x16, 0x87,       /* 42 */
966            0x4B, 0x16,             /* 45 */
967            0x20,                   /* 47 */
968            0xE6, 0xBD,             /* 48 */
969            0xEB, 0x5B,             /* 50 */
970            0x4B, 0xCC,             /* 52 */
971            0xF9, 0xA2,             /* 54 */
972            0xFC, 0x10, 0x3E,       /* 56 */
973            0xFE, 0x16, 0x3A, 0x8C, /* 59 */
974            0x20,                   /* 63 */
975            0xFC, 0x03, 0xAC,       /* 64 */
976            0xFF,                   /* 67 - FF just resets the state without encoding anything */
977            0x01,                   /* 68 */
978            0xDE, 0x83,             /* 69 */
979            0x20,                   /* 71 */
980            0x09                    /* 72 */
981        };
982        UChar expected[]={
983            0xFEFF, 0x0061, 0x0062, 0x0020,
984            0x0063, 0x0061, 0x000D, 0x000A,
985            0x0020, 0x0000, 0x00DF, 0x00E6,
986            0x0930, 0x0020, 0x0918, 0x0909,
987            0x3086, 0x304D, 0x0020, 0x3053,
988            0x4000, 0x4E00, 0x7777, 0x0020,
989            0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
990            0x0020, 0xD7A3, 0xDC00, 0xD800,
991            0xD800, 0xDC00, 0xD845, 0xDDDD,
992            0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
993            0xDFFF, 0x0001, 0x0E40, 0x0020,
994            0x0009
995        };
996        int32_t offsets[]={
997            0, 3, 6, 7, /* skip 8, */
998            10, 11, /* skip 12, */
999            14, 15, 16, 17, 18,
1000            20, 21, 23, 24, 25, 26, 28, 29,
1001            30, 31, 33, 35, 37, 38,
1002            40, 42, 45, 47, 48,
1003            50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1004            63, 64, /* trail */ 64, /* reset only 67, */
1005            68, 69,
1006            71, 72
1007        };
1008
1009        if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1010                                 expected, ARRAY_LENGTH(expected), "BOCU-1",
1011                                 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1012        ) {
1013            log_err("BOCU-1->u with skip did not match.\n");
1014        }
1015    }
1016
1017    log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1018    {
1019        const uint8_t sampleText[]={
1020            0x61,                               /* 0  'a' */
1021            0xc0, 0x80,                         /* 1  non-shortest form */
1022            0xc4, 0xb5,                         /* 3  U+0135 */
1023            0xed, 0x80, 0xa0,                   /* 5  Hangul U+d020 */
1024            0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8  surrogate pair for U+10401 */
1025            0xee, 0x80, 0x80,                   /* 14 PUA U+e000 */
1026            0xed, 0xb0, 0x81,                   /* 17 unpaired trail surrogate U+dc01 */
1027            0xf0, 0x90, 0x80, 0x80,             /* 20 illegal 4-byte form for U+10000 */
1028            0x62,                               /* 24 'b' */
1029            0xed, 0xa0, 0x81,                   /* 25 unpaired lead surrogate U+d801 */
1030            0xed, 0xa0,                         /* 28 incomplete sequence */
1031            0xd0, 0x80                          /* 30 U+0400 */
1032        };
1033        UChar expected[]={
1034            0x0061,
1035            /* skip */
1036            0x0135,
1037            0xd020,
1038            0xd801, 0xdc01,
1039            0xe000,
1040            0xdc01,
1041            /* skip */
1042            0x0062,
1043            0xd801,
1044            0x0400
1045        };
1046        int32_t offsets[]={
1047            0,
1048            /* skip 1, */
1049            3,
1050            5,
1051            8, 11,
1052            14,
1053            17,
1054            /* skip 20, 20, */
1055            24,
1056            25,
1057            /* skip 28 */
1058            30
1059        };
1060
1061        /* without offsets */
1062        if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1063                                 expected, ARRAY_LENGTH(expected), "CESU-8",
1064                                 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1065        ) {
1066            log_err("CESU-8->u with skip did not match.\n");
1067        }
1068
1069        /* with offsets */
1070        if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1071                                 expected, ARRAY_LENGTH(expected), "CESU-8",
1072                                 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1073        ) {
1074            log_err("CESU-8->u with skip did not match.\n");
1075        }
1076    }
1077}
1078
1079static void TestStop(int32_t inputsize, int32_t outputsize)
1080{
1081    static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1082    static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1083
1084    static const uint8_t expstopIBM_949[]= {
1085        0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1086
1087    static const uint8_t expstopIBM_943[] = {
1088        0x9f, 0xaf, 0x9f, 0xb1};
1089
1090    static const uint8_t expstopIBM_930[] = {
1091        0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1092
1093    static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1094    static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1095    static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1096
1097
1098    static const int32_t  toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1099    static const int32_t  toIBM943Offsstop [] = { 0, 0, 1, 1};
1100    static const int32_t  toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1101
1102    static const int32_t  fromIBM949Offs [] = { 0, 1, 3};
1103    static const int32_t  fromIBM943Offs [] = { 0, 2};
1104    static const int32_t  fromIBM930Offs [] = { 1, 3};
1105
1106    gInBufferSize = inputsize;
1107    gOutBufferSize = outputsize;
1108
1109    /*From Unicode*/
1110
1111#if !UCONFIG_NO_LEGACY_CONVERSION
1112    if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1113            expstopIBM_949, sizeof(expstopIBM_949), "ibm-949",
1114            UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1115        log_err("u-> ibm-949 with stop did not match.\n");
1116    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1117            expstopIBM_943, sizeof(expstopIBM_943), "ibm-943",
1118            UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1119        log_err("u-> ibm-943 with stop did not match.\n");
1120    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1121            expstopIBM_930, sizeof(expstopIBM_930), "ibm-930",
1122            UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1123        log_err("u-> ibm-930 with stop did not match.\n");
1124
1125    log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP  \n");
1126    {
1127        static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1128        static const uint8_t toIBM943[]= { 0x61,};
1129        static const int32_t offset[]= {0,} ;
1130
1131         /*EUC_JP*/
1132        static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1133        static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1134        static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1135
1136        /*EUC_TW*/
1137        static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1138        static const uint8_t to_euc_tw[]={
1139            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1140        static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1141
1142        /*ISO-2022-JP*/
1143        static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1144        static const uint8_t to_iso_2022_jp[]={
1145             0x41,
1146
1147        };
1148        static const int32_t from_iso_2022_jpOffs [] ={0,};
1149
1150        /*ISO-2022-cn*/
1151        static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1152        static const uint8_t to_iso_2022_cn[]={
1153            0x41,
1154
1155        };
1156        static const int32_t from_iso_2022_cnOffs [] ={
1157            0,0,
1158            2,2,
1159        };
1160
1161        /*ISO-2022-kr*/
1162        static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1163        static const uint8_t to_iso_2022_kr[]={
1164            0x1b,   0x24,   0x29,   0x43,
1165            0x41,
1166            0x0e,   0x25,   0x50,
1167        };
1168        static const int32_t from_iso_2022_krOffs [] ={
1169            -1,-1,-1,-1,
1170             0,
1171            1,1,1,
1172        };
1173
1174        /* HZ encoding */
1175        static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1176
1177        static const uint8_t to_hz[]={
1178            0x7e,   0x7d, 0x41,
1179            0x7e,   0x7b,   0x26,   0x30,
1180
1181        };
1182        static const int32_t from_hzOffs [] ={
1183            0, 0,0,
1184            1,1,1,1,
1185        };
1186
1187        /*ISCII*/
1188        static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1189        static const uint8_t to_iscii[]={
1190            0x41,
1191        };
1192        static const int32_t from_isciiOffs [] ={
1193            0,
1194        };
1195
1196        if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1197                toIBM943, sizeof(toIBM943), "ibm-943",
1198                UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1199            log_err("u-> ibm-943 with stop did not match.\n");
1200
1201        if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1202                to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
1203                UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1204            log_err("u-> euc-jp with stop did not match.\n");
1205
1206        if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1207                to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1208                UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1209            log_err("u-> euc-tw with stop did not match.\n");
1210
1211        if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1212                to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1213                UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1214            log_err("u-> iso-2022-jp with stop did not match.\n");
1215
1216        if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1217                to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1218                UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1219            log_err("u-> iso-2022-jp with stop did not match.\n");
1220
1221        if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
1222                to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
1223                UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1224            log_err("u-> iso-2022-cn with stop did not match.\n");
1225
1226        if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
1227                to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
1228                UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1229            log_err("u-> iso-2022-kr with stop did not match.\n");
1230
1231        if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
1232                to_hz, sizeof(to_hz), "HZ",
1233                UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1234            log_err("u-> HZ with stop did not match.\n");\
1235
1236        if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
1237                to_iscii, sizeof(to_iscii), "ISCII,version=0",
1238                UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1239            log_err("u-> iscii with stop did not match.\n");
1240
1241
1242    }
1243#endif
1244
1245    log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1246    {
1247        static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1248
1249        static const uint8_t to_SCSU[]={
1250            0x41,
1251
1252        };
1253        int32_t from_SCSUOffs [] ={
1254            0,
1255
1256        };
1257        if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1258                to_SCSU, sizeof(to_SCSU), "SCSU",
1259                UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1260            log_err("u-> SCSU with skip did not match.\n");
1261
1262    }
1263
1264    /*to Unicode*/
1265
1266#if !UCONFIG_NO_LEGACY_CONVERSION
1267    if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949),
1268             IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949",
1269            UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1270        log_err("ibm-949->u with stop did not match.\n");
1271    if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943),
1272             IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943",
1273            UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1274        log_err("ibm-943->u with stop did not match.\n");
1275    if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930),
1276             IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930",
1277            UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1278        log_err("ibm-930->u with stop did not match.\n");
1279
1280    log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1281    {
1282
1283        static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1284            0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1285        };
1286        static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63 };
1287        static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1288
1289
1290         /*EUC-JP*/
1291        static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1292            0x8f, 0xda, 0xa1,  /*unassigned*/
1293           0x8e, 0xe0,
1294        };
1295        static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1296        static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1297
1298          /*EUC_TW*/
1299        static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1300            0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1301           0xe6, 0xca, 0x8a,
1302        };
1303        UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1304        int32_t from_euc_twOffs [] ={ 0, 1, 3};
1305
1306
1307
1308         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1309             EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1310            UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1311        log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1312
1313        if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1314             euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
1315            UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1316        log_err("euc-jp->u with stop did not match.\n");
1317
1318        if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1319                 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1320                UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1321            log_err("euc-tw->u with stop did not match.\n");
1322    }
1323#endif
1324
1325    log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1326    {
1327        static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1328            0xe0, 0x80,  0x61,};
1329        static const UChar    expected1[] = {  0x0031, 0x4e8c,};
1330        static const int32_t offsets1[] = {   0x0000, 0x0001};
1331
1332        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1333                 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1334                UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1335            log_err("utf8->u with stop did not match.\n");;
1336    }
1337    log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1338    {
1339        static const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1340        static const UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061};
1341        static const int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003};
1342
1343        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1344                 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1345                UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1346            log_err("scsu->u with stop did not match.\n");;
1347    }
1348
1349}
1350
1351static void TestSub(int32_t inputsize, int32_t outputsize)
1352{
1353    static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1354    static const UChar sampleText2[]=    { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1355
1356    static const uint8_t expsubIBM_949[] =
1357     { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1358
1359    static const uint8_t expsubIBM_943[] = {
1360        0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1361
1362    static const uint8_t expsubIBM_930[] = {
1363        0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1364
1365    static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1366    static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1367    static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1368
1369    static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1370    static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1371    static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1372
1373    static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1374    static const int32_t  fromIBM943Offs [] = { 0, 2, 4, 6 };
1375    static const int32_t  fromIBM930Offs [] = { 1, 3, 5, 7 };
1376
1377    gInBufferSize = inputsize;
1378    gOutBufferSize = outputsize;
1379
1380    /*from unicode*/
1381
1382#if !UCONFIG_NO_LEGACY_CONVERSION
1383    if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1384            expsubIBM_949, sizeof(expsubIBM_949), "ibm-949",
1385            UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1386        log_err("u-> ibm-949 with subst did not match.\n");
1387    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1388            expsubIBM_943, sizeof(expsubIBM_943), "ibm-943",
1389            UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1390        log_err("u-> ibm-943 with subst did not match.\n");
1391    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1392            expsubIBM_930, sizeof(expsubIBM_930), "ibm-930",
1393            UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1394        log_err("u-> ibm-930 with subst did not match.\n");
1395
1396    log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE  \n");
1397    {
1398        static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1399        static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1400        static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1401
1402
1403        /* EUC_JP*/
1404        static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1405        static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1406            0xf4, 0xfe, 0xf4, 0xfe,
1407            0x61, 0x8e, 0xe0,
1408        };
1409        static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1410
1411        /*EUC_TW*/
1412        static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1413        static const uint8_t to_euc_tw[]={
1414            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1415            0xfd, 0xfe, 0xfd, 0xfe,
1416            0x61, 0xe6, 0xca, 0x8a,
1417        };
1418
1419        static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1420
1421        if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1422                toIBM943, sizeof(toIBM943), "ibm-943",
1423                UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1424            log_err("u-> ibm-943 with substitute did not match.\n");
1425
1426        if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1427                to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
1428                UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1429            log_err("u-> euc-jp with substitute did not match.\n");
1430
1431        if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1432                to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1433                UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1434            log_err("u-> euc-tw with substitute did not match.\n");
1435    }
1436#endif
1437
1438    log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1439    {
1440        UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1441
1442        const uint8_t to_SCSU[]={
1443            0x41,
1444            0x0e, 0xff,0xfd,
1445            0x42
1446
1447
1448        };
1449        int32_t from_SCSUOffs [] ={
1450            0,
1451            1,1,1,
1452            2,
1453
1454        };
1455        const uint8_t to_SCSU_1[]={
1456            0x41,
1457
1458        };
1459        int32_t from_SCSUOffs_1 [] ={
1460            0,
1461
1462        };
1463        if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1464                to_SCSU, sizeof(to_SCSU), "SCSU",
1465                UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1466            log_err("u-> SCSU with substitute did not match.\n");
1467
1468        if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1469                to_SCSU_1, sizeof(to_SCSU_1), "SCSU",
1470                UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1471            log_err("u-> SCSU with substitute did not match.\n");
1472    }
1473
1474    log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1475    {
1476        static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1477        static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1478                           0xf0, 0x90, 0x90, 0x81,
1479                           0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1480                           0xef, 0xbf, 0xbf, 0x61,
1481
1482        };
1483        static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1484        if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]),
1485                expectedUTF8, sizeof(expectedUTF8), "utf8",
1486                UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1487            log_err("u-> utf8 with stop did not match.\n");
1488        }
1489    }
1490
1491    log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1492    {
1493        static const UChar in[]={ 0x0041, 0xfeff };
1494
1495        static const uint8_t out[]={
1496#if U_IS_BIG_ENDIAN
1497            0xfe, 0xff,
1498            0x00, 0x41,
1499            0xfe, 0xff
1500#else
1501            0xff, 0xfe,
1502            0x41, 0x00,
1503            0xff, 0xfe
1504#endif
1505        };
1506        static const int32_t offsets[]={
1507            -1, -1, 0, 0, 1, 1
1508        };
1509
1510        if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1511                                   out, sizeof(out), "UTF-16",
1512                                   UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1513        ) {
1514            log_err("u->UTF-16 with substitute did not match.\n");
1515        }
1516    }
1517
1518    log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1519    {
1520        static const UChar in[]={ 0x0041, 0xfeff };
1521
1522        static const uint8_t out[]={
1523#if U_IS_BIG_ENDIAN
1524            0x00, 0x00, 0xfe, 0xff,
1525            0x00, 0x00, 0x00, 0x41,
1526            0x00, 0x00, 0xfe, 0xff
1527#else
1528            0xff, 0xfe, 0x00, 0x00,
1529            0x41, 0x00, 0x00, 0x00,
1530            0xff, 0xfe, 0x00, 0x00
1531#endif
1532        };
1533        static const int32_t offsets[]={
1534            -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1535        };
1536
1537        if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1538                                   out, sizeof(out), "UTF-32",
1539                                   UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1540        ) {
1541            log_err("u->UTF-32 with substitute did not match.\n");
1542        }
1543    }
1544
1545    /*to unicode*/
1546
1547#if !UCONFIG_NO_LEGACY_CONVERSION
1548    if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949),
1549             IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949",
1550            UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1551        log_err("ibm-949->u with substitute did not match.\n");
1552    if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943),
1553             IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943",
1554            UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1555        log_err("ibm-943->u with substitute did not match.\n");
1556    if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930),
1557             IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930",
1558            UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1559        log_err("ibm-930->u with substitute did not match.\n");
1560
1561    log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1562    {
1563
1564        const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1565            0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1566        };
1567        UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0xfffd, 0x03b4
1568        };
1569        int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1570
1571
1572        /* EUC_JP*/
1573        const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1574            0x8f, 0xda, 0xa1,  /*unassigned*/
1575           0x8e, 0xe0, 0x8a
1576        };
1577        UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1578        int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6,  9, 11 };
1579
1580        /*EUC_TW*/
1581        const uint8_t sampleTxt_euc_tw[]={
1582            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1583            0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1584            0xe6, 0xca, 0x8a,
1585        };
1586        UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1587        int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1588
1589
1590        if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1591           EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1592          UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1593            log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1594
1595
1596        if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1597           euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
1598          UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1599            log_err("euc-jp->u with substitute did not match.\n");
1600
1601
1602        if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1603           euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1604          UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1605            log_err("euc-tw->u with substitute  did not match.\n");
1606
1607
1608        if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1609           euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
1610          UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1611            log_err("euc-jp->u with substitute did not match.\n");
1612    }
1613#endif
1614
1615    log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1616    {
1617        const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1618            0xe0, 0x80,  0x61,};
1619        UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0x0061};
1620        int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0006};
1621
1622        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1623                 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1624                UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1625            log_err("utf8->u with substitute did not match.\n");;
1626    }
1627    log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1628    {
1629        const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1630        UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffd,0xfffd};
1631        int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
1632
1633        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1634                 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1635                UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1636            log_err("scsu->u with stop did not match.\n");;
1637    }
1638
1639#if !UCONFIG_NO_LEGACY_CONVERSION
1640    log_verbose("Testing ibm-930 subchar/subchar1\n");
1641    {
1642        static const UChar u1[]={         0x6d63,           0x6d64,     0x6d65,     0x6d66,     0xdf };
1643        static const uint8_t s1[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1644        static const int32_t offsets1[]={ 0,    0,    0,    1,    1,    2,    2,    3,    3,    4,    4 };
1645
1646        static const UChar u2[]={         0x6d63,           0x6d64,     0xfffd,     0x6d66,     0x1a };
1647        static const uint8_t s2[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1648        static const int32_t offsets2[]={ 1,                3,          5,          7,          10 };
1649
1650        if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930",
1651                                   UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1652        ) {
1653            log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1654        }
1655
1656        if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930",
1657                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1658        ) {
1659            log_err("ibm-930->u subchar/subchar1 did not match.\n");
1660        }
1661    }
1662
1663    log_verbose("Testing GB 18030 with substitute callbacks\n");
1664    {
1665        static const UChar u2[]={
1666            0x24, 0x7f, 0x80,                   0x1f9,      0x20ac,     0x4e00,     0x9fa6,                 0xffff,                 0xd800, 0xdc00,         0xfffd,                 0xdbff, 0xdfff };
1667        static const uint8_t gb2[]={
1668            0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1669        static const int32_t offsets2[]={
1670            0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1671
1672        if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030",
1673                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1674        ) {
1675            log_err("gb18030->u with substitute did not match.\n");
1676        }
1677    }
1678#endif
1679
1680    log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1681    {
1682        static const uint8_t utf7[]={
1683         /* a~            a+AB~                           a+AB\x0c                        a+AB-                         a+AB.                         a+. */
1684            0x61, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x0c,   0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b,   0x2e
1685        };
1686        static const UChar unicode[]={
1687            0x61, 0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,           0x61,       0xfffd,     0x2e, 0x61, 0xfffd, 0x2e
1688        };
1689        static const int32_t offsets[]={
1690            0,    1,      2,          4,          6,      7,          9,          11,     12,         14,               17,         19,         21,   22,   23,     24
1691        };
1692
1693        if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7",
1694                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1695        ) {
1696            log_err("UTF-7->u with substitute did not match.\n");
1697        }
1698    }
1699
1700    log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1701    {
1702        static const uint8_t
1703            in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1704            in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1705            in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1706
1707        static const UChar
1708            out1[]={ 0x4e00, 0xfeff },
1709            out2[]={ 0x004e, 0xfffe },
1710            out3[]={ 0xfefd, 0x4e00, 0xfeff };
1711
1712        static const int32_t
1713            offsets1[]={ 2, 4 },
1714            offsets2[]={ 2, 4 },
1715            offsets3[]={ 0, 2, 4 };
1716
1717        if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16",
1718                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1719        ) {
1720            log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1721        }
1722
1723        if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16",
1724                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1725        ) {
1726            log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1727        }
1728
1729        if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16",
1730                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1731        ) {
1732            log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1733        }
1734    }
1735
1736    log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1737    {
1738        static const uint8_t
1739            in1[]={ 0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff },
1740            in2[]={ 0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00 },
1741            in3[]={ 0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01 },
1742            in4[]={ 0x00, 0x01, 0x02, 0x03,   0x00, 0x11, 0x12, 0x00,   0x00, 0x00, 0x4e, 0x00 };
1743
1744        static const UChar
1745            out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1746            out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1747            out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1748            out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1749
1750        static const int32_t
1751            offsets1[]={ 4, 4, 8 },
1752            offsets2[]={ 4, 4, 8 },
1753            offsets3[]={ 0, 4, 4, 8, 12 },
1754            offsets4[]={ 0, 0, 4, 8 };
1755
1756        if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32",
1757                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1758        ) {
1759            log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1760        }
1761
1762        if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32",
1763                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1764        ) {
1765            log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1766        }
1767
1768        if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32",
1769                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1770        ) {
1771            log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1772        }
1773
1774        if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32",
1775                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1776        ) {
1777            log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1778        }
1779    }
1780}
1781
1782static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1783{
1784    UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1785    UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1786
1787    const uint8_t expsubwvalIBM_949[]= {
1788        0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1789        0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1790
1791    const uint8_t expsubwvalIBM_943[]= {
1792        0x9f, 0xaf, 0x9f, 0xb1,
1793        0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1794
1795    const uint8_t expsubwvalIBM_930[] = {
1796        0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1797
1798    int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1799    int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1800    int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1801
1802    gInBufferSize = inputsize;
1803    gOutBufferSize = outputsize;
1804
1805    /*from Unicode*/
1806
1807#if !UCONFIG_NO_LEGACY_CONVERSION
1808    if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1809            expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949",
1810            UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1811        log_err("u-> ibm-949 with subst with value did not match.\n");
1812
1813    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1814            expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943",
1815            UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1816        log_err("u-> ibm-943 with sub with value did not match.\n");
1817
1818    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1819            expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930",
1820            UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1821        log_err("u-> ibm-930 with subst with value did not match.\n");
1822
1823
1824    log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
1825    {
1826        static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1827        static const uint8_t toIBM943[]= { 0x61,
1828            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1829            0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1830            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1831            0x61 };
1832        static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1833
1834
1835         /* EUC_JP*/
1836        static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1837        static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1838            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1839            0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1840            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1841            0x61, 0x8e, 0xe0,
1842        };
1843        static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1844            3, 3, 3, 3, 3, 3,
1845            3, 3, 3, 3, 3, 3,
1846            5, 5, 5, 5, 5, 5,
1847            6, 7, 7,
1848        };
1849
1850        /*EUC_TW*/
1851        static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1852        static const uint8_t to_euc_tw[]={
1853            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1854            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1855            0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1856            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1857            0x61, 0xe6, 0xca, 0x8a,
1858        };
1859        static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1860             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1861             6, 7, 7, 8,
1862        };
1863        /*ISO-2022-JP*/
1864        static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1865        static const uint8_t to_iso_2022_jp1[]={
1866            0x1b,   0x24,   0x42,   0x21, 0x21,
1867            0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1868            0x1b,   0x24,   0x42,   0x21, 0x22,
1869            0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1870            0x42,
1871        };
1872
1873        static const int32_t from_iso_2022_jpOffs1 [] ={
1874            0,0,0,0,0,
1875            1,1,1,1,1,1,1,1,1,
1876            2,2,2,2,2,
1877            3,3,3,3,3,3,3,3,3,
1878            4,
1879        };
1880        /* surrogate pair*/
1881        static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1882        static const uint8_t to_iso_2022_jp2[]={
1883                                0x1b,   0x24,   0x42,   0x21,   0x21,
1884                                0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1885                                0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1886                                0x1b,   0x24,   0x42,   0x21,   0x22,
1887                                0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1888                                0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1889                                0x42,
1890                                };
1891        static const int32_t from_iso_2022_jpOffs2 [] ={
1892            0,0,0,0,0,
1893            1,1,1,1,1,1,1,1,1,
1894            1,1,1,1,1,1,
1895            3,3,3,3,3,
1896            4,4,4,4,4,4,4,4,4,
1897            4,4,4,4,4,4,
1898            6,
1899        };
1900
1901        /*ISO-2022-cn*/
1902        static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1903        static const uint8_t to_iso_2022_cn[]={
1904            0x41,
1905            0x25, 0x55,   0x33,   0x37,   0x31,   0x32,
1906            0x42,
1907        };
1908        static const int32_t from_iso_2022_cnOffs [] ={
1909            0,
1910            1,1,1,1,1,1,
1911            2,
1912        };
1913
1914        static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1915
1916        static const uint8_t to_iso_2022_cn4[]={
1917                             0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
1918                             0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1919                             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1920                             0x0e,   0x21,   0x22,
1921                             0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1922                             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1923                             0x42,
1924                             };
1925        static const int32_t from_iso_2022_cnOffs4 [] ={
1926            0,0,0,0,0,0,0,
1927            1,1,1,1,1,1,1,
1928            1,1,1,1,1,1,
1929            3,3,3,
1930            4,4,4,4,4,4,4,
1931            4,4,4,4,4,4,
1932            6
1933
1934        };
1935
1936        /*ISO-2022-kr*/
1937        static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1938        static const uint8_t to_iso_2022_kr2[]={
1939            0x1b,   0x24,   0x29,   0x43,
1940            0x41,
1941            0x0e,   0x25,   0x50,
1942            0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1943            0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1944            0x0e,   0x25,   0x50,
1945            0x0f,   0x42,
1946            0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1947            0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1948            0x43
1949        };
1950        static const int32_t from_iso_2022_krOffs2 [] ={
1951            -1,-1,-1,-1,
1952             0,
1953            1,1,1,
1954            2,2,2,2,2,2,2,
1955            2,2,2,2,2,2,
1956            4,4,4,
1957            5,5,
1958            6,6,6,6,6,6,
1959            6,6,6,6,6,6,
1960            8,
1961        };
1962
1963        static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1964        static const uint8_t to_iso_2022_kr[]={
1965            0x1b,   0x24,   0x29,   0x43,
1966            0x41,
1967            0x0e,   0x25,   0x50,
1968            0x0f,   0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1969            0x0e,   0x25,   0x50,
1970            0x0f,   0x42,
1971            0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1972            0x43
1973        };
1974
1975
1976        static const int32_t from_iso_2022_krOffs [] ={
1977            -1,-1,-1,-1,
1978             0,
1979            1,1,1,
1980            2,2,2,2,2,2,2,
1981            3,3,3,
1982            4,4,
1983            5,5,5,5,5,5,
1984            6,
1985        };
1986        /* HZ encoding */
1987        static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1988
1989        static const uint8_t to_hz[]={
1990            0x7e,   0x7d,   0x41,
1991            0x7e,   0x7b,   0x26,   0x30,
1992            0x7e,   0x7d,   0x25,   0x55,   0x30,   0x36,   0x36,   0x32,  /*unassigned*/
1993            0x7e,   0x7b,   0x26,   0x30,
1994            0x7e,   0x7d,   0x42,
1995
1996        };
1997        static const int32_t from_hzOffs [] ={
1998            0,0,0,
1999            1,1,1,1,
2000            2,2,2,2,2,2,2,2,
2001            3,3,3,3,
2002            4,4,4
2003        };
2004
2005        static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2006        static const uint8_t to_hz2[]={
2007            0x7e,   0x7d,   0x41,
2008            0x7e,   0x7b,   0x26,   0x30,
2009            0x7e,   0x7d,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2010            0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2011            0x7e,   0x7b,   0x26,   0x30,
2012            0x7e,   0x7d,   0x42,
2013            0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2014            0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2015            0x43
2016        };
2017        static const int32_t from_hzOffs2 [] ={
2018            0,0,0,
2019            1,1,1,1,
2020            2,2,2,2,2,2,2,2,
2021            2,2,2,2,2,2,
2022            4,4,4,4,
2023            5,5,5,
2024            6,6,6,6,6,6,
2025            6,6,6,6,6,6,
2026            8,
2027        };
2028
2029                /*ISCII*/
2030        static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2031        static const uint8_t to_iscii[]={
2032            0x41,
2033            0xef,   0x42,   0xa1,
2034            0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2035            0xa2,
2036            0x42,
2037            0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2038            0x43
2039        };
2040
2041
2042        static const int32_t from_isciiOffs [] ={
2043            0,
2044            1,1,1,
2045            2,2,2,2,2,2,
2046            3,
2047            4,
2048            5,5,5,5,5,5,
2049            6,
2050        };
2051
2052        if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
2053                toIBM943, sizeof(toIBM943), "ibm-943",
2054                UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2055            log_err("u-> ibm-943 with subst with value did not match.\n");
2056
2057        if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
2058                to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
2059                UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2060            log_err("u-> euc-jp with subst with value did not match.\n");
2061
2062        if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
2063                to_euc_tw, sizeof(to_euc_tw), "euc-tw",
2064                UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2065            log_err("u-> euc-tw with subst with value did not match.\n");
2066
2067        if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2068                to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2069                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2070            log_err("u-> iso_2022_jp with subst with value did not match.\n");
2071
2072        if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2073                to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2074                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2075            log_err("u-> iso_2022_jp with subst with value did not match.\n");
2076
2077        if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
2078                to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
2079                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2080            log_err("u-> iso_2022_jp with subst with value did not match.\n");
2081        /*ESCAPE OPTIONS*/
2082        {
2083            /* surrogate pair*/
2084            static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2085            static const uint8_t to_iso_2022_jp3_v2[]={
2086                    0x1b,   0x24,   0x42,   0x21,   0x21,
2087                    0x1b,   0x28,   0x42,   0x26,   0x23,   0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2088
2089                    0x1b,   0x24,   0x42,   0x21,   0x22,
2090                    0x1b,   0x28,   0x42,   0x26,   0x23,  0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2091
2092                    0x42,
2093                    0x26,   0x23,   0x33,   0x36,   0x38,   0x39,   0x32,   0x3b,
2094                    };
2095
2096            static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2097                0,0,0,0,0,
2098                1,1,1,1,1,1,1,1,1,1,1,1,
2099
2100                3,3,3,3,3,
2101                4,4,4,4,4,4,4,4,4,4,4,4,
2102
2103                6,
2104                7,7,7,7,7,7,7,7,7
2105            };
2106
2107            if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]),
2108                    to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp",
2109                    UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2110                log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2111        }
2112        {
2113            static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2114            static const uint8_t to_iso_2022_cn5_v2[]={
2115                             0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2116                             0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2117                             0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2118                             0x0e,   0x21,   0x22,
2119                             0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2120                             0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2121                             0x42,
2122                             0x5c,   0x75,   0x30,   0x39,   0x30,   0x32,
2123                             };
2124            static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2125                0,0,0,0,0,0,0,
2126                1,1,1,1,1,1,1,
2127                1,1,1,1,1,1,
2128                3,3,3,
2129                4,4,4,4,4,4,4,
2130                4,4,4,4,4,4,
2131                6,
2132                7,7,7,7,7,7
2133            };
2134            if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]),
2135                to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn",
2136                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2137                log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2138
2139        }
2140        {
2141            static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2142            static const uint8_t to_iso_2022_cn6_v2[]={
2143                                0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2144                                0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2145                                0x0e,   0x21,   0x22,
2146                                0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2147                                0x42,
2148                                0x7b,   0x55,   0x2b,   0x30,   0x39,   0x30,   0x32,   0x7d
2149                             };
2150            static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2151                    0,  0,  0,  0,  0,  0,  0,
2152                    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2153                    3,  3,  3,
2154                    4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2155                    6,
2156                    7,  7,  7,  7,  7,  7,  7,  7,
2157            };
2158            if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]),
2159                to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn",
2160                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2161                log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2162
2163        }
2164        {
2165            static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2166            static const uint8_t to_iso_2022_cn7_v2[]={
2167                                0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2168                                0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2169                                0x0e,   0x21,   0x22,
2170                                0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2171                                0x42,   0x25,   0x55,   0x30,   0x39,   0x30,   0x32,
2172                            };
2173            static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2174                                0,  0,  0,  0,  0,  0,  0,
2175                                1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2176                                3,  3,  3,
2177                                4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2178                                6,
2179                                7,  7,  7,  7,  7,  7,
2180            };
2181            if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]),
2182                to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn",
2183                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2184                log_err("u-> iso-2022-cn with sub & K did not match.\n");
2185
2186        }
2187        {
2188            static const UChar iso_2022_cn_inputText8[]={
2189                                0x3000,
2190                                0xD84D, 0xDC56,
2191                                0x3001,
2192                                0xD84D, 0xDC56,
2193                                0xDBFF, 0xDFFF,
2194                                0x0042,
2195                                0x0902};
2196            static const uint8_t to_iso_2022_cn8_v2[]={
2197                                0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2198                                0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2199                                0x0e,   0x21,   0x22,
2200                                0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2201                                0x5c,   0x31,   0x30,   0x46,   0x46,   0x46,   0x46,   0x20,
2202                                0x42,
2203                                0x5c,   0x39,   0x30,   0x32,   0x20
2204                             };
2205            static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2206                    0,  0,  0,  0,  0,  0,  0,
2207                    1,  1,  1,  1,  1,  1,  1,  1,
2208                    3,  3,  3,
2209                    4,  4,  4,  4,  4,  4,  4,  4,
2210                    6,  6,  6,  6,  6,  6,  6,  6,
2211                    8,
2212                    9,  9,  9,  9,  9
2213            };
2214            if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]),
2215                to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn",
2216                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2217                log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2218
2219        }
2220        {
2221            static const uint8_t to_iso_2022_cn4_v3[]={
2222                            0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2223                            0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2224                            0x0e,   0x21,   0x22,
2225                            0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2226                            0x42
2227                             };
2228
2229
2230            static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2231                0,0,0,0,0,0,0,
2232                1,1,1,1,1,1,1,1,1,1,1,
2233
2234                3,3,3,
2235                4,4,4,4,4,4,4,4,4,4,4,
2236
2237                6
2238
2239            };
2240            if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2241                to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn",
2242                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2243            {
2244                log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2245            }
2246        }
2247        if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
2248                to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
2249                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2250            log_err("u-> iso_2022_cn with subst with value did not match.\n");
2251
2252        if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2253                to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn",
2254                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2255            log_err("u-> iso_2022_cn with subst with value did not match.\n");
2256        if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
2257                to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
2258                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2259            log_err("u-> iso_2022_kr with subst with value did not match.\n");
2260        if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]),
2261                to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr",
2262                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2263            log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2264        if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
2265                to_hz, sizeof(to_hz), "HZ",
2266                UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2267            log_err("u-> hz with subst with value did not match.\n");
2268        if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]),
2269                to_hz2, sizeof(to_hz2), "HZ",
2270                UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2271            log_err("u-> hz with subst with value did not match.\n");
2272
2273        if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
2274                to_iscii, sizeof(to_iscii), "ISCII,version=0",
2275                UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2276            log_err("u-> iscii with subst with value did not match.\n");
2277    }
2278#endif
2279
2280    log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2281    /*to Unicode*/
2282    {
2283#if !UCONFIG_NO_LEGACY_CONVERSION
2284        static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2285            0x81, 0xad, /*unassigned*/
2286            0x89, 0xd3 };
2287        static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2288            0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2289            0x7B87};
2290        static const int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2291
2292        /* EUC_JP*/
2293        static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2294            0x8f, 0xda, 0xa1,  /*unassigned*/
2295           0x8e, 0xe0,
2296        };
2297        static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2298            0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2299            0x00a2 };
2300        static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2301            6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2302            9,
2303        };
2304
2305        /*EUC_TW*/
2306        static const uint8_t sampleTxt_euc_tw[]={
2307            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2308            0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2309            0xe6, 0xca, 0x8a,
2310        };
2311        static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2312             0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2313             0x8706, 0x8a, };
2314        static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2315             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2316             11, 13};
2317
2318        /*iso-2022-jp*/
2319        static const uint8_t sampleTxt_iso_2022_jp[]={
2320            0x1b,   0x28,   0x42,   0x41,
2321            0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
2322            0x1b,   0x28,   0x42,   0x42,
2323
2324        };
2325                                                   /*     A    %    X    3    A    %    X    1    A     B    */
2326        static const UChar iso_2022_jptoUnicode[]={    0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2327        static const int32_t from_iso_2022_jpOffs [] ={  3,   7,   7,   7,   7,   7,   7,   7,   7,    12   };
2328
2329        /*iso-2022-cn*/
2330        static const uint8_t sampleTxt_iso_2022_cn[]={
2331            0x0f,   0x41,   0x44,
2332            0x1B,   0x24,   0x29,   0x47,
2333            0x0E,   0x40,   0x6c, /*unassigned*/
2334            0x0f,   0x42,
2335
2336        };
2337        static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2338        static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   8,   8,   8,   8,   8,   8,   8,  8,    11   };
2339
2340        /*iso-2022-kr*/
2341        static const uint8_t sampleTxt_iso_2022_kr[]={
2342          0x1b, 0x24, 0x29,  0x43,
2343          0x41,
2344          0x0E, 0x7f, 0x1E,
2345          0x0e, 0x25, 0x50,
2346          0x0f, 0x51,
2347          0x42, 0x43,
2348
2349        };
2350        static const UChar iso_2022_krtoUnicode[]={     0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2351        static const int32_t from_iso_2022_krOffs [] ={  4,   6,   6,   6,   6,   6,   6,   6,   6,    9,    12,   13  , 14 };
2352
2353        /*hz*/
2354        static const uint8_t sampleTxt_hz[]={
2355            0x41,
2356            0x7e,   0x7b,   0x26,   0x30,
2357            0x7f,   0x1E, /*unassigned*/
2358            0x26,   0x30,
2359            0x7e,   0x7d,   0x42,
2360            0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
2361            0x7e,   0x7d,   0x42,
2362        };
2363        static const UChar hztoUnicode[]={
2364            0x41,
2365            0x03a0,
2366            0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2367            0x03A0,
2368            0x42,
2369            0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2370            0x42,};
2371
2372        static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18,  };
2373
2374
2375        /*iscii*/
2376        static const uint8_t sampleTxt_iscii[]={
2377            0x41,
2378            0x30,
2379            0xEB, /*unassigned*/
2380            0xa3,
2381            0x42,
2382            0xEC, /*unassigned*/
2383            0x42,
2384        };
2385        static const UChar isciitoUnicode[]={
2386            0x41,
2387            0x30,
2388            0x25,  0x58,  0x45, 0x42,
2389            0x0903,
2390            0x42,
2391            0x25,  0x58,  0x45, 0x43,
2392            0x42,};
2393
2394        static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6  };
2395#endif
2396
2397        /*UTF8*/
2398        static const uint8_t sampleTxtUTF8[]={
2399            0x20, 0x64, 0x50,
2400            0xC2, 0x7E, /* truncated char */
2401            0x20,
2402            0xE0, 0xB5, 0x7E, /* truncated char */
2403            0x40,
2404        };
2405        static const UChar UTF8ToUnicode[]={
2406            0x0020, 0x0064, 0x0050,
2407            0x0025, 0x0058, 0x0043, 0x0032, 0x007E,  /* \xC2~ */
2408            0x0020,
2409            0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2410            0x0040
2411        };
2412        static const int32_t fromUTF8[] = {
2413            0, 1, 2,
2414            3, 3, 3, 3, 4,
2415            5,
2416            6, 6, 6, 6, 6, 6, 6, 6, 8,
2417            9
2418        };
2419        static const UChar UTF8ToUnicodeXML_DEC[]={
2420            0x0020, 0x0064, 0x0050,
2421            0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E,  /* &#194;~ */
2422            0x0020,
2423            0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2424            0x0040
2425        };
2426        static const int32_t fromUTF8XML_DEC[] = {
2427            0, 1, 2,
2428            3, 3, 3, 3, 3, 3, 4,
2429            5,
2430            6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2431            9
2432        };
2433
2434
2435#if !UCONFIG_NO_LEGACY_CONVERSION
2436        if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU),
2437                 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
2438                UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2439            log_err("ibm-943->u with substitute with value did not match.\n");
2440
2441        if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP),
2442                 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"IBM-eucJP",
2443                UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2444            log_err("euc-jp->u with substitute with value did not match.\n");
2445
2446        if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
2447                 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
2448                UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2449            log_err("euc-tw->u with substitute with value did not match.\n");
2450
2451        if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2452                 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2453                UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2454            log_err("iso-2022-jp->u with substitute with value did not match.\n");
2455
2456        if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2457                 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2458                UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2459            log_err("iso-2022-jp->u with substitute with value did not match.\n");
2460
2461        {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2462            {
2463                static const UChar iso_2022_jptoUnicodeDec[]={
2464                                                  0x0041,
2465                                                  /*   &         #         5         8         ;   */
2466                                                  0x0026,   0x0023,   0x0035,   0x0038,   0x003b,
2467                                                  0x0026,   0x0023,   0x0032,   0x0036,   0x003b,
2468                                                  0x0042 };
2469                static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12,  };
2470                if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2471                     iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2472                    UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2473                log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2474            }
2475            {
2476                static const UChar iso_2022_jptoUnicodeHex[]={
2477                                                  0x0041,
2478                                                  /*   &       #       x       3       A       ;  */
2479                                                  0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2480                                                  0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2481                                                  0x0042 };
2482                static const int32_t from_iso_2022_jpOffsHex [] ={  3,7,7,7,7,7,7,7,7,7,7,7,7,12   };
2483                if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2484                     iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2485                    UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2486                log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2487            }
2488            {
2489                static const UChar iso_2022_jptoUnicodeC[]={
2490                                                0x0041,
2491                                                0x005C, 0x0078, 0x0033, 0x0041,   /*  \x3A */
2492                                                0x005C, 0x0078, 0x0031, 0x0041,   /*  \x1A */
2493                                                0x0042 };
2494                int32_t from_iso_2022_jpOffsC [] ={  3,7,7,7,7,7,7,7,7,12   };
2495                if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2496                     iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2497                    UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2498                log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2499            }
2500        }
2501        if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
2502                 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
2503                UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2504            log_err("iso-2022-cn->u with substitute with value did not match.\n");
2505
2506        if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
2507                 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
2508                UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2509            log_err("iso-2022-kr->u with substitute with value did not match.\n");
2510
2511         if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
2512                 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
2513                UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2514            log_err("hz->u with substitute with value did not match.\n");
2515
2516         if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
2517                 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
2518                UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2519            log_err("ISCII ->u with substitute with value did not match.\n");
2520#endif
2521
2522        if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2523                UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8",
2524                UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2525            log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2526        if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2527                UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8",
2528                UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2529            log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2530    }
2531}
2532
2533#if !UCONFIG_NO_LEGACY_CONVERSION
2534static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2535{
2536    static const UChar    legalText[] =  { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2537    static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2538    static const int32_t  to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2539
2540
2541    static const uint8_t text943[] = {
2542        0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2543    static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,  0x5b57 };
2544    static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22,  0x5b57 };
2545    static const UChar toUnicode943stop[]= { 0x304b};
2546
2547    static const int32_t  fromIBM943Offssub[]  = { 0, 2, 3, 4, 5, 7 };
2548    static const int32_t  fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2549    static const int32_t  fromIBM943Offsstop[] = { 0};
2550
2551    gInBufferSize = inputsize;
2552    gOutBufferSize = outputsize;
2553    /*checking with a legal value*/
2554    if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]),
2555            templegal949, sizeof(templegal949), "ibm-949",
2556            UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2557        log_err("u-> ibm-949 with skip did not match.\n");
2558
2559    /*checking illegal value for ibm-943 with substitute*/
2560    if(!testConvertToUnicode(text943, sizeof(text943),
2561             toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2562            UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2563        log_err("ibm-943->u with subst did not match.\n");
2564    /*checking illegal value for ibm-943 with skip */
2565    if(!testConvertToUnicode(text943, sizeof(text943),
2566             toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943",
2567            UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2568        log_err("ibm-943->u with skip did not match.\n");
2569
2570    /*checking illegal value for ibm-943 with stop */
2571    if(!testConvertToUnicode(text943, sizeof(text943),
2572             toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943",
2573            UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2574        log_err("ibm-943->u with stop did not match.\n");
2575
2576}
2577
2578static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2579{
2580    static const uint8_t sampleText[] = {
2581        0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2582        0xff, 0x32, 0x33};
2583    static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2584    static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2585    /*checking illegal value for ibm-943 with substitute*/
2586    gInBufferSize = inputsize;
2587    gOutBufferSize = outputsize;
2588
2589    if(!testConvertToUnicode(sampleText, sizeof(sampleText),
2590             toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2591            UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2592        log_err("ibm-943->u with subst did not match.\n");
2593}
2594
2595static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2596{
2597    /*EBCDIC_STATEFUL*/
2598    static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2599    static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2600    static const int32_t offset_930[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    4,    4,    5,    5    };
2601/*                              s     SO    doubl       SI    sng   s     SO    fe    fe    SI    s    */
2602
2603    /*EBCDIC_STATEFUL with subChar=3f*/
2604    static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2605    static const int32_t offset_930_subvaried[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    5    };
2606    static const char mySubChar[]={ 0x3f};
2607
2608    gInBufferSize = inputsize;
2609    gOutBufferSize = outputsize;
2610
2611    if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2612        toIBM930, sizeof(toIBM930), "ibm-930",
2613        UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2614            log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2615
2616    if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2617        toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930",
2618        UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2619            log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2620}
2621#endif
2622
2623UBool testConvertFromUnicode(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
2624                const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2625                const char *mySubChar, int8_t len)
2626{
2627
2628
2629    UErrorCode status = U_ZERO_ERROR;
2630    UConverter *conv = 0;
2631    char junkout[NEW_MAX_BUFFER]; /* FIX */
2632    int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2633    const UChar *src;
2634    char *end;
2635    char *targ;
2636    int32_t *offs;
2637    int i;
2638    int32_t  realBufferSize;
2639    char *realBufferEnd;
2640    const UChar *realSourceEnd;
2641    const UChar *sourceLimit;
2642    UBool checkOffsets = TRUE;
2643    UBool doFlush;
2644    char junk[9999];
2645    char offset_str[9999];
2646    char *p;
2647    UConverterFromUCallback oldAction = NULL;
2648    const void* oldContext = NULL;
2649
2650
2651    for(i=0;i<NEW_MAX_BUFFER;i++)
2652        junkout[i] = (char)0xF0;
2653    for(i=0;i<NEW_MAX_BUFFER;i++)
2654        junokout[i] = 0xFF;
2655    setNuConvTestName(codepage, "FROM");
2656
2657    log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
2658            gOutBufferSize);
2659
2660    conv = ucnv_open(codepage, &status);
2661    if(U_FAILURE(status))
2662    {
2663        log_data_err("Couldn't open converter %s\n",codepage);
2664        return TRUE;
2665    }
2666
2667    log_verbose("Converter opened..\n");
2668
2669    /*----setting the callback routine----*/
2670    ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2671    if (U_FAILURE(status))
2672    {
2673        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2674    }
2675    /*------------------------*/
2676    /*setting the subChar*/
2677    if(mySubChar != NULL){
2678        ucnv_setSubstChars(conv, mySubChar, len, &status);
2679        if (U_FAILURE(status))  {
2680            log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2681        }
2682    }
2683    /*------------*/
2684
2685    src = source;
2686    targ = junkout;
2687    offs = junokout;
2688
2689    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2690    realBufferEnd = junkout + realBufferSize;
2691    realSourceEnd = source + sourceLen;
2692
2693    if ( gOutBufferSize != realBufferSize )
2694      checkOffsets = FALSE;
2695
2696    if( gInBufferSize != NEW_MAX_BUFFER )
2697      checkOffsets = FALSE;
2698
2699    do
2700    {
2701        end = nct_min(targ + gOutBufferSize, realBufferEnd);
2702        sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2703
2704        doFlush = (UBool)(sourceLimit == realSourceEnd);
2705
2706        if(targ == realBufferEnd)
2707        {
2708            log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2709            return FALSE;
2710        }
2711        log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2712
2713
2714        status = U_ZERO_ERROR;
2715
2716        ucnv_fromUnicode (conv,
2717                  (char **)&targ,
2718                  (const char *)end,
2719                  &src,
2720                  sourceLimit,
2721                  checkOffsets ? offs : NULL,
2722                  doFlush, /* flush if we're at the end of the input data */
2723                  &status);
2724    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2725
2726
2727    if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2728        UChar errChars[50]; /* should be sufficient */
2729        int8_t errLen = 50;
2730        UErrorCode err = U_ZERO_ERROR;
2731        const UChar* start= NULL;
2732        ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2733        if(U_FAILURE(err)){
2734            log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2735        }
2736        /* length of in invalid chars should be equal to returned length*/
2737        start = src - errLen;
2738        if(u_strncmp(errChars,start,errLen)!=0){
2739            log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2740        }
2741    }
2742    /* allow failure codes for the stop callback */
2743    if(U_FAILURE(status) &&
2744       (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2745    {
2746        log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2747        return FALSE;
2748    }
2749
2750    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2751        sourceLen, targ-junkout);
2752    if(getTestOption(VERBOSITY_OPTION))
2753    {
2754
2755        junk[0] = 0;
2756        offset_str[0] = 0;
2757        for(p = junkout;p<targ;p++)
2758        {
2759            sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2760            sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2761        }
2762
2763        log_verbose(junk);
2764        printSeq(expect, expectLen);
2765        if ( checkOffsets )
2766        {
2767            log_verbose("\nOffsets:");
2768            log_verbose(offset_str);
2769        }
2770        log_verbose("\n");
2771    }
2772    ucnv_close(conv);
2773
2774
2775    if(expectLen != targ-junkout)
2776    {
2777        log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2778        log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2779        printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2780        printSeqErr(expect, expectLen);
2781        return FALSE;
2782    }
2783
2784    if (checkOffsets && (expectOffsets != 0) )
2785    {
2786        log_verbose("comparing %d offsets..\n", targ-junkout);
2787        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2788            log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2789            log_err("Got Output : ");
2790            printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2791            log_err("Got Offsets:      ");
2792            for(p=junkout;p<targ;p++)
2793                log_err("%d,", junokout[p-junkout]);
2794            log_err("\n");
2795            log_err("Expected Offsets: ");
2796            for(i=0; i<(targ-junkout); i++)
2797                log_err("%d,", expectOffsets[i]);
2798            log_err("\n");
2799            return FALSE;
2800        }
2801    }
2802
2803    if(!memcmp(junkout, expect, expectLen))
2804    {
2805        log_verbose("String matches! %s\n", gNuConvTestName);
2806        return TRUE;
2807    }
2808    else
2809    {
2810        log_err("String does not match. %s\n", gNuConvTestName);
2811        log_err("source: ");
2812        printUSeqErr(source, sourceLen);
2813        log_err("Got:      ");
2814        printSeqErr((const uint8_t *)junkout, expectLen);
2815        log_err("Expected: ");
2816        printSeqErr(expect, expectLen);
2817        return FALSE;
2818    }
2819}
2820
2821UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2822               const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2823               const char *mySubChar, int8_t len)
2824{
2825    UErrorCode status = U_ZERO_ERROR;
2826    UConverter *conv = 0;
2827    UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
2828    int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2829    const char *src;
2830    const char *realSourceEnd;
2831    const char *srcLimit;
2832    UChar *targ;
2833    UChar *end;
2834    int32_t *offs;
2835    int i;
2836    UBool   checkOffsets = TRUE;
2837    char junk[9999];
2838    char offset_str[9999];
2839    UChar *p;
2840    UConverterToUCallback oldAction = NULL;
2841    const void* oldContext = NULL;
2842
2843    int32_t   realBufferSize;
2844    UChar *realBufferEnd;
2845
2846
2847    for(i=0;i<NEW_MAX_BUFFER;i++)
2848        junkout[i] = 0xFFFE;
2849
2850    for(i=0;i<NEW_MAX_BUFFER;i++)
2851        junokout[i] = -1;
2852
2853    setNuConvTestName(codepage, "TO");
2854
2855    log_verbose("\n=========  %s\n", gNuConvTestName);
2856
2857    conv = ucnv_open(codepage, &status);
2858    if(U_FAILURE(status))
2859    {
2860        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2861        return TRUE;
2862    }
2863
2864    log_verbose("Converter opened..\n");
2865
2866    src = (const char *)source;
2867    targ = junkout;
2868    offs = junokout;
2869
2870    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2871    realBufferEnd = junkout + realBufferSize;
2872    realSourceEnd = src + sourcelen;
2873    /*----setting the callback routine----*/
2874    ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2875    if (U_FAILURE(status))
2876    {
2877        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2878    }
2879    /*-------------------------------------*/
2880    /*setting the subChar*/
2881    if(mySubChar != NULL){
2882        ucnv_setSubstChars(conv, mySubChar, len, &status);
2883        if (U_FAILURE(status))  {
2884            log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2885        }
2886    }
2887    /*------------*/
2888
2889
2890    if ( gOutBufferSize != realBufferSize )
2891        checkOffsets = FALSE;
2892
2893    if( gInBufferSize != NEW_MAX_BUFFER )
2894        checkOffsets = FALSE;
2895
2896    do
2897    {
2898        end = nct_min( targ + gOutBufferSize, realBufferEnd);
2899        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2900
2901        if(targ == realBufferEnd)
2902        {
2903            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2904            return FALSE;
2905        }
2906        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2907
2908
2909
2910        status = U_ZERO_ERROR;
2911
2912        ucnv_toUnicode (conv,
2913                &targ,
2914                end,
2915                (const char **)&src,
2916                (const char *)srcLimit,
2917                checkOffsets ? offs : NULL,
2918                (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2919                &status);
2920    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2921
2922    if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2923        char errChars[50]; /* should be sufficient */
2924        int8_t errLen = 50;
2925        UErrorCode err = U_ZERO_ERROR;
2926        const char* start= NULL;
2927        ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2928        if(U_FAILURE(err)){
2929            log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2930        }
2931        /* length of in invalid chars should be equal to returned length*/
2932        start = src - errLen;
2933        if(uprv_strncmp(errChars,start,errLen)!=0){
2934            log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2935        }
2936    }
2937    /* allow failure codes for the stop callback */
2938    if(U_FAILURE(status) &&
2939       (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2940    {
2941        log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2942        return FALSE;
2943    }
2944
2945    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2946        sourcelen, targ-junkout);
2947    if(getTestOption(VERBOSITY_OPTION))
2948    {
2949
2950        junk[0] = 0;
2951        offset_str[0] = 0;
2952
2953        for(p = junkout;p<targ;p++)
2954        {
2955            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2956            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2957        }
2958
2959        log_verbose(junk);
2960        printUSeq(expect, expectlen);
2961        if ( checkOffsets )
2962        {
2963            log_verbose("\nOffsets:");
2964            log_verbose(offset_str);
2965        }
2966        log_verbose("\n");
2967    }
2968    ucnv_close(conv);
2969
2970    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2971
2972    if (checkOffsets && (expectOffsets != 0))
2973    {
2974        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2975        {
2976            log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2977            log_err("Got offsets:      ");
2978            for(p=junkout;p<targ;p++)
2979                log_err("  %2d,", junokout[p-junkout]);
2980            log_err("\n");
2981            log_err("Expected offsets: ");
2982            for(i=0; i<(targ-junkout); i++)
2983                log_err("  %2d,", expectOffsets[i]);
2984            log_err("\n");
2985            log_err("Got output:       ");
2986            for(i=0; i<(targ-junkout); i++)
2987                log_err("0x%04x,", junkout[i]);
2988            log_err("\n");
2989            log_err("From source:      ");
2990            for(i=0; i<(src-(const char *)source); i++)
2991                log_err("  0x%02x,", (unsigned char)source[i]);
2992            log_err("\n");
2993        }
2994    }
2995
2996    if(!memcmp(junkout, expect, expectlen*2))
2997    {
2998        log_verbose("Matches!\n");
2999        return TRUE;
3000    }
3001    else
3002    {
3003        log_err("String does not match. %s\n", gNuConvTestName);
3004        log_verbose("String does not match. %s\n", gNuConvTestName);
3005        log_err("Got:      ");
3006        printUSeqErr(junkout, expectlen);
3007        log_err("Expected: ");
3008        printUSeqErr(expect, expectlen);
3009        log_err("\n");
3010        return FALSE;
3011    }
3012}
3013
3014UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
3015                const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3016                const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3017{
3018
3019
3020    UErrorCode status = U_ZERO_ERROR;
3021    UConverter *conv = 0;
3022    char junkout[NEW_MAX_BUFFER]; /* FIX */
3023    int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3024    const UChar *src;
3025    char *end;
3026    char *targ;
3027    int32_t *offs;
3028    int i;
3029    int32_t  realBufferSize;
3030    char *realBufferEnd;
3031    const UChar *realSourceEnd;
3032    const UChar *sourceLimit;
3033    UBool checkOffsets = TRUE;
3034    UBool doFlush;
3035    char junk[9999];
3036    char offset_str[9999];
3037    char *p;
3038    UConverterFromUCallback oldAction = NULL;
3039    const void* oldContext = NULL;
3040
3041
3042    for(i=0;i<NEW_MAX_BUFFER;i++)
3043        junkout[i] = (char)0xF0;
3044    for(i=0;i<NEW_MAX_BUFFER;i++)
3045        junokout[i] = 0xFF;
3046    setNuConvTestName(codepage, "FROM");
3047
3048    log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
3049            gOutBufferSize);
3050
3051    conv = ucnv_open(codepage, &status);
3052    if(U_FAILURE(status))
3053    {
3054        log_data_err("Couldn't open converter %s\n",codepage);
3055        return TRUE; /* Because the err has already been logged. */
3056    }
3057
3058    log_verbose("Converter opened..\n");
3059
3060    /*----setting the callback routine----*/
3061    ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3062    if (U_FAILURE(status))
3063    {
3064        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3065    }
3066    /*------------------------*/
3067    /*setting the subChar*/
3068    if(mySubChar != NULL){
3069        ucnv_setSubstChars(conv, mySubChar, len, &status);
3070        if (U_FAILURE(status))  {
3071            log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3072        }
3073    }
3074    /*------------*/
3075
3076    src = source;
3077    targ = junkout;
3078    offs = junokout;
3079
3080    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3081    realBufferEnd = junkout + realBufferSize;
3082    realSourceEnd = source + sourceLen;
3083
3084    if ( gOutBufferSize != realBufferSize )
3085      checkOffsets = FALSE;
3086
3087    if( gInBufferSize != NEW_MAX_BUFFER )
3088      checkOffsets = FALSE;
3089
3090    do
3091    {
3092        end = nct_min(targ + gOutBufferSize, realBufferEnd);
3093        sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3094
3095        doFlush = (UBool)(sourceLimit == realSourceEnd);
3096
3097        if(targ == realBufferEnd)
3098        {
3099            log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3100            return FALSE;
3101        }
3102        log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3103
3104
3105        status = U_ZERO_ERROR;
3106
3107        ucnv_fromUnicode (conv,
3108                  (char **)&targ,
3109                  (const char *)end,
3110                  &src,
3111                  sourceLimit,
3112                  checkOffsets ? offs : NULL,
3113                  doFlush, /* flush if we're at the end of the input data */
3114                  &status);
3115    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3116
3117    /* allow failure codes for the stop callback */
3118    if(U_FAILURE(status) && status != expectedError)
3119    {
3120        log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3121        return FALSE;
3122    }
3123
3124    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3125        sourceLen, targ-junkout);
3126    if(getTestOption(VERBOSITY_OPTION))
3127    {
3128
3129        junk[0] = 0;
3130        offset_str[0] = 0;
3131        for(p = junkout;p<targ;p++)
3132        {
3133            sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3134            sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3135        }
3136
3137        log_verbose(junk);
3138        printSeq(expect, expectLen);
3139        if ( checkOffsets )
3140        {
3141            log_verbose("\nOffsets:");
3142            log_verbose(offset_str);
3143        }
3144        log_verbose("\n");
3145    }
3146    ucnv_close(conv);
3147
3148
3149    if(expectLen != targ-junkout)
3150    {
3151        log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3152        log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3153        printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3154        printSeqErr(expect, expectLen);
3155        return FALSE;
3156    }
3157
3158    if (checkOffsets && (expectOffsets != 0) )
3159    {
3160        log_verbose("comparing %d offsets..\n", targ-junkout);
3161        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3162            log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3163            log_err("Got Output : ");
3164            printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3165            log_err("Got Offsets:      ");
3166            for(p=junkout;p<targ;p++)
3167                log_err("%d,", junokout[p-junkout]);
3168            log_err("\n");
3169            log_err("Expected Offsets: ");
3170            for(i=0; i<(targ-junkout); i++)
3171                log_err("%d,", expectOffsets[i]);
3172            log_err("\n");
3173            return FALSE;
3174        }
3175    }
3176
3177    if(!memcmp(junkout, expect, expectLen))
3178    {
3179        log_verbose("String matches! %s\n", gNuConvTestName);
3180        return TRUE;
3181    }
3182    else
3183    {
3184        log_err("String does not match. %s\n", gNuConvTestName);
3185        log_err("source: ");
3186        printUSeqErr(source, sourceLen);
3187        log_err("Got:      ");
3188        printSeqErr((const uint8_t *)junkout, expectLen);
3189        log_err("Expected: ");
3190        printSeqErr(expect, expectLen);
3191        return FALSE;
3192    }
3193}
3194UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3195               const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3196               const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3197{
3198    UErrorCode status = U_ZERO_ERROR;
3199    UConverter *conv = 0;
3200    UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
3201    int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3202    const char *src;
3203    const char *realSourceEnd;
3204    const char *srcLimit;
3205    UChar *targ;
3206    UChar *end;
3207    int32_t *offs;
3208    int i;
3209    UBool   checkOffsets = TRUE;
3210    char junk[9999];
3211    char offset_str[9999];
3212    UChar *p;
3213    UConverterToUCallback oldAction = NULL;
3214    const void* oldContext = NULL;
3215
3216    int32_t   realBufferSize;
3217    UChar *realBufferEnd;
3218
3219
3220    for(i=0;i<NEW_MAX_BUFFER;i++)
3221        junkout[i] = 0xFFFE;
3222
3223    for(i=0;i<NEW_MAX_BUFFER;i++)
3224        junokout[i] = -1;
3225
3226    setNuConvTestName(codepage, "TO");
3227
3228    log_verbose("\n=========  %s\n", gNuConvTestName);
3229
3230    conv = ucnv_open(codepage, &status);
3231    if(U_FAILURE(status))
3232    {
3233        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3234        return TRUE;
3235    }
3236
3237    log_verbose("Converter opened..\n");
3238
3239    src = (const char *)source;
3240    targ = junkout;
3241    offs = junokout;
3242
3243    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3244    realBufferEnd = junkout + realBufferSize;
3245    realSourceEnd = src + sourcelen;
3246    /*----setting the callback routine----*/
3247    ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3248    if (U_FAILURE(status))
3249    {
3250        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3251    }
3252    /*-------------------------------------*/
3253    /*setting the subChar*/
3254    if(mySubChar != NULL){
3255        ucnv_setSubstChars(conv, mySubChar, len, &status);
3256        if (U_FAILURE(status))  {
3257            log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3258        }
3259    }
3260    /*------------*/
3261
3262
3263    if ( gOutBufferSize != realBufferSize )
3264        checkOffsets = FALSE;
3265
3266    if( gInBufferSize != NEW_MAX_BUFFER )
3267        checkOffsets = FALSE;
3268
3269    do
3270    {
3271        end = nct_min( targ + gOutBufferSize, realBufferEnd);
3272        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3273
3274        if(targ == realBufferEnd)
3275        {
3276            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3277            return FALSE;
3278        }
3279        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3280
3281
3282
3283        status = U_ZERO_ERROR;
3284
3285        ucnv_toUnicode (conv,
3286                &targ,
3287                end,
3288                (const char **)&src,
3289                (const char *)srcLimit,
3290                checkOffsets ? offs : NULL,
3291                (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3292                &status);
3293    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3294
3295    /* allow failure codes for the stop callback */
3296    if(U_FAILURE(status) && status!=expectedError)
3297    {
3298        log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3299        return FALSE;
3300    }
3301
3302    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3303        sourcelen, targ-junkout);
3304    if(getTestOption(VERBOSITY_OPTION))
3305    {
3306
3307        junk[0] = 0;
3308        offset_str[0] = 0;
3309
3310        for(p = junkout;p<targ;p++)
3311        {
3312            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3313            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3314        }
3315
3316        log_verbose(junk);
3317        printUSeq(expect, expectlen);
3318        if ( checkOffsets )
3319        {
3320            log_verbose("\nOffsets:");
3321            log_verbose(offset_str);
3322        }
3323        log_verbose("\n");
3324    }
3325    ucnv_close(conv);
3326
3327    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3328
3329    if (checkOffsets && (expectOffsets != 0))
3330    {
3331        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3332        {
3333            log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3334            log_err("Got offsets:      ");
3335            for(p=junkout;p<targ;p++)
3336                log_err("  %2d,", junokout[p-junkout]);
3337            log_err("\n");
3338            log_err("Expected offsets: ");
3339            for(i=0; i<(targ-junkout); i++)
3340                log_err("  %2d,", expectOffsets[i]);
3341            log_err("\n");
3342            log_err("Got output:       ");
3343            for(i=0; i<(targ-junkout); i++)
3344                log_err("0x%04x,", junkout[i]);
3345            log_err("\n");
3346            log_err("From source:      ");
3347            for(i=0; i<(src-(const char *)source); i++)
3348                log_err("  0x%02x,", (unsigned char)source[i]);
3349            log_err("\n");
3350        }
3351    }
3352
3353    if(!memcmp(junkout, expect, expectlen*2))
3354    {
3355        log_verbose("Matches!\n");
3356        return TRUE;
3357    }
3358    else
3359    {
3360        log_err("String does not match. %s\n", gNuConvTestName);
3361        log_verbose("String does not match. %s\n", gNuConvTestName);
3362        log_err("Got:      ");
3363        printUSeqErr(junkout, expectlen);
3364        log_err("Expected: ");
3365        printUSeqErr(expect, expectlen);
3366        log_err("\n");
3367        return FALSE;
3368    }
3369}
3370
3371static void TestCallBackFailure(void) {
3372    UErrorCode status = U_USELESS_COLLATOR_ERROR;
3373    ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3374    if (status != U_USELESS_COLLATOR_ERROR) {
3375        log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3376    }
3377    ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3378    if (status != U_USELESS_COLLATOR_ERROR) {
3379        log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3380    }
3381    ucnv_cbFromUWriteSub(NULL, -1, &status);
3382    if (status != U_USELESS_COLLATOR_ERROR) {
3383        log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3384    }
3385    ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3386    if (status != U_USELESS_COLLATOR_ERROR) {
3387        log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3388    }
3389}
3390