1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*
7********************************************************************************
8* File NCCBTST.C
9*
10* Modification History:
11*        Name                            Description
12*    Madhu Katragadda     7/21/1999      Testing error callback routines
13********************************************************************************
14*/
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <ctype.h>
19#include "cstring.h"
20#include "unicode/uloc.h"
21#include "unicode/ucnv.h"
22#include "unicode/ucnv_err.h"
23#include "cintltst.h"
24#include "unicode/utypes.h"
25#include "unicode/ustring.h"
26#include "nccbtst.h"
27#include "unicode/ucnv_cb.h"
28#define NEW_MAX_BUFFER 999
29
30#define nct_min(x,y)  ((x<y) ? x : y)
31#define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0]))
32
33static int32_t  gInBufferSize = 0;
34static int32_t  gOutBufferSize = 0;
35static char     gNuConvTestName[1024];
36
37static void printSeq(const uint8_t* a, int len)
38{
39    int i=0;
40    log_verbose("\n{");
41    while (i<len)
42        log_verbose("0x%02X, ", a[i++]);
43    log_verbose("}\n");
44}
45
46static void printUSeq(const UChar* a, int len)
47{
48    int i=0;
49    log_verbose("{");
50    while (i<len)
51        log_verbose("  0x%04x, ", a[i++]);
52    log_verbose("}\n");
53}
54
55static void printSeqErr(const uint8_t* a, int len)
56{
57    int i=0;
58    fprintf(stderr, "{");
59    while (i<len)
60        fprintf(stderr, "  0x%02x, ", a[i++]);
61    fprintf(stderr, "}\n");
62}
63
64static void printUSeqErr(const UChar* a, int len)
65{
66    int i=0;
67    fprintf(stderr, "{");
68    while (i<len)
69        fprintf(stderr, "0x%04x, ", a[i++]);
70    fprintf(stderr,"}\n");
71}
72
73static void setNuConvTestName(const char *codepage, const char *direction)
74{
75    sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
76            codepage,
77            direction,
78            (int)gInBufferSize,
79            (int)gOutBufferSize);
80}
81
82
83static void TestCallBackFailure(void);
84
85void addTestConvertErrorCallBack(TestNode** root);
86
87void addTestConvertErrorCallBack(TestNode** root)
88{
89    addTest(root, &TestSkipCallBack,  "tsconv/nccbtst/TestSkipCallBack");
90    addTest(root, &TestStopCallBack,  "tsconv/nccbtst/TestStopCallBack");
91    addTest(root, &TestSubCallBack,   "tsconv/nccbtst/TestSubCallBack");
92    /* BEGIN android-removed
93       To save space, Android does not build complete CJK conversion tables.
94       We skip the test here.
95    addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
96       END android-removed */
97
98#if !UCONFIG_NO_LEGACY_CONVERSION
99    addTest(root, &TestLegalAndOtherCallBack,  "tsconv/nccbtst/TestLegalAndOtherCallBack");
100    addTest(root, &TestSingleByteCallBack,  "tsconv/nccbtst/TestSingleByteCallBack");
101#endif
102
103    addTest(root, &TestCallBackFailure,  "tsconv/nccbtst/TestCallBackFailure");
104}
105
106static void TestSkipCallBack()
107{
108    TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
109    TestSkip(1,NEW_MAX_BUFFER);
110    TestSkip(1,1);
111    TestSkip(NEW_MAX_BUFFER, 1);
112}
113
114static void TestStopCallBack()
115{
116    TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
117    TestStop(1,NEW_MAX_BUFFER);
118    TestStop(1,1);
119    TestStop(NEW_MAX_BUFFER, 1);
120}
121
122static void TestSubCallBack()
123{
124    TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
125    TestSub(1,NEW_MAX_BUFFER);
126    TestSub(1,1);
127    TestSub(NEW_MAX_BUFFER, 1);
128
129#if !UCONFIG_NO_LEGACY_CONVERSION
130    TestEBCDIC_STATEFUL_Sub(1, 1);
131    TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
132    TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
133    TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
134#endif
135}
136
137static void TestSubWithValueCallBack()
138{
139    TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
140    TestSubWithValue(1,NEW_MAX_BUFFER);
141    TestSubWithValue(1,1);
142    TestSubWithValue(NEW_MAX_BUFFER, 1);
143}
144
145#if !UCONFIG_NO_LEGACY_CONVERSION
146static void TestLegalAndOtherCallBack()
147{
148    TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
149    TestLegalAndOthers(1,NEW_MAX_BUFFER);
150    TestLegalAndOthers(1,1);
151    TestLegalAndOthers(NEW_MAX_BUFFER, 1);
152}
153
154static void TestSingleByteCallBack()
155{
156    TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
157    TestSingleByte(1,NEW_MAX_BUFFER);
158    TestSingleByte(1,1);
159    TestSingleByte(NEW_MAX_BUFFER, 1);
160}
161#endif
162
163static void TestSkip(int32_t inputsize, int32_t outputsize)
164{
165    static const uint8_t expskipIBM_949[]= {
166        0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
167
168    static const uint8_t expskipIBM_943[] = {
169        0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
170
171    static const uint8_t expskipIBM_930[] = {
172        0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
173
174    gInBufferSize = inputsize;
175    gOutBufferSize = outputsize;
176
177    /*From Unicode*/
178    log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP  \n");
179
180#if !UCONFIG_NO_LEGACY_CONVERSION
181    {
182        static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
183        static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
184
185        static const int32_t  toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
186        static const int32_t  toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
187
188        if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
189                expskipIBM_949, sizeof(expskipIBM_949), "ibm-949",
190                UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
191            log_err("u-> ibm-949 with skip did not match.\n");
192        if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
193                expskipIBM_943, sizeof(expskipIBM_943), "ibm-943",
194                UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
195            log_err("u-> ibm-943 with skip did not match.\n");
196    }
197
198    {
199        static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
200        static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
201        static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
202
203        /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
204        if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR,
205                                   fromUBytes, sizeof(fromUBytes),
206                                   "ibm-930",
207                                   UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
208                                   NULL, 0)
209        ) {
210            log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
211        }
212    }
213#endif
214
215    {
216        static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
217        static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
218        static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
219
220        static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
221        static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
222        static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
223
224        /* US-ASCII */
225        if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
226                                   usasciiFromUBytes, sizeof(usasciiFromUBytes),
227                                   "US-ASCII",
228                                   UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
229                                   NULL, 0)
230        ) {
231            log_err("u->US-ASCII with skip did not match.\n");
232        }
233
234#if !UCONFIG_NO_LEGACY_CONVERSION
235        /* SBCS NLTC codepage 367 for US-ASCII */
236        if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
237                                   usasciiFromUBytes, sizeof(usasciiFromUBytes),
238                                   "ibm-367",
239                                   UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
240                                   NULL, 0)
241        ) {
242            log_err("u->ibm-367 with skip did not match.\n");
243        }
244#endif
245
246        /* ISO-Latin-1 */
247        if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
248                                   latin1FromUBytes, sizeof(latin1FromUBytes),
249                                   "LATIN_1",
250                                   UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
251                                   NULL, 0)
252        ) {
253            log_err("u->LATIN_1 with skip did not match.\n");
254        }
255
256#if !UCONFIG_NO_LEGACY_CONVERSION
257        /* windows-1252 */
258        if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
259                                   latin1FromUBytes, sizeof(latin1FromUBytes),
260                                   "windows-1252",
261                                   UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
262                                   NULL, 0)
263        ) {
264            log_err("u->windows-1252 with skip did not match.\n");
265        }
266    }
267
268    {
269        static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
270        static const uint8_t toIBM943[]= { 0x61, 0x61 };
271        static const int32_t offset[]= {0, 4};
272
273         /* EUC_JP*/
274        static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
275        static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
276            0x61, 0x8e, 0xe0,
277        };
278        static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
279
280        /*EUC_TW*/
281        static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
282        static const uint8_t to_euc_tw[]={
283            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
284            0x61, 0xe6, 0xca, 0x8a,
285        };
286        static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
287
288        /*ISO-2022-JP*/
289        static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
290        static const uint8_t to_iso_2022_jp[]={
291            0x41,
292            0x42,
293
294        };
295        static const int32_t from_iso_2022_jpOffs [] ={0,2};
296
297        /*ISO-2022-JP*/
298        UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
299        static const uint8_t to_iso_2022_jp2[]={
300            0x41,
301            0x43,
302
303        };
304        static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
305
306        /*ISO-2022-cn*/
307        static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
308        static const uint8_t to_iso_2022_cn[]={
309            0x41, 0x42
310        };
311        static const int32_t from_iso_2022_cnOffs [] ={
312            0, 2
313        };
314
315        /*ISO-2022-CN*/
316        static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
317        static const uint8_t to_iso_2022_cn1[]={
318            0x41, 0x43
319
320        };
321        static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
322
323        /*ISO-2022-kr*/
324        static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
325        static const uint8_t to_iso_2022_kr[]={
326            0x1b,   0x24,   0x29,   0x43,
327            0x41,
328            0x0e,   0x25,   0x50,
329            0x25,   0x50,
330            0x0f,   0x42,
331        };
332        static const int32_t from_iso_2022_krOffs [] ={
333            -1,-1,-1,-1,
334            0,
335            1,1,1,
336            3,3,
337            4,4
338        };
339
340        /*ISO-2022-kr*/
341        static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
342        static const uint8_t to_iso_2022_kr1[]={
343            0x1b,   0x24,   0x29,   0x43,
344            0x41,
345            0x0e,   0x25,   0x50,
346            0x25,   0x50,
347
348        };
349        static const int32_t from_iso_2022_krOffs1 [] ={
350            -1,-1,-1,-1,
351            0,
352            1,1,1,
353            3,3,
354
355        };
356        /* HZ encoding */
357        static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
358
359        static const uint8_t to_hz[]={
360            0x7e,   0x7d,   0x41,
361            0x7e,   0x7b,   0x26,   0x30,
362            0x26,   0x30,
363            0x7e,   0x7d,   0x42,
364
365        };
366        static const int32_t from_hzOffs [] ={
367            0,0,0,
368            1,1,1,1,
369            3,3,
370            4,4,4,4
371        };
372
373        static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
374
375        static const uint8_t to_hz1[]={
376            0x7e,   0x7d,   0x41,
377            0x7e,   0x7b,   0x26,   0x30,
378            0x26,   0x30,
379
380
381        };
382        static const int32_t from_hzOffs1 [] ={
383            0,0,0,
384            1,1,1,1,
385            3,3,
386
387        };
388
389#endif
390
391        static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
392
393        static const uint8_t to_SCSU[]={
394            0x41,
395            0x42
396
397
398        };
399        static const int32_t from_SCSUOffs [] ={
400            0,
401            2,
402
403        };
404
405#if !UCONFIG_NO_LEGACY_CONVERSION
406        /* ISCII */
407        static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
408        static const uint8_t to_iscii[]={
409            0x41,
410            0x42,
411        };
412        static const int32_t from_isciiOffs [] ={
413            0,2,
414
415        };
416        /*ISCII*/
417        static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
418        static const uint8_t to_iscii1[]={
419            0x44,
420            0x43,
421
422        };
423        static const int32_t from_isciiOffs1 [] ={0,2};
424
425        if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
426                toIBM943, sizeof(toIBM943), "ibm-943",
427                UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
428            log_err("u-> ibm-943 with skip did not match.\n");
429
430        if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
431                to_euc_jp, sizeof(to_euc_jp), "euc-jp",
432                UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
433            log_err("u-> euc-jp with skip did not match.\n");
434
435        if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
436                to_euc_tw, sizeof(to_euc_tw), "euc-tw",
437                UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
438            log_err("u-> euc-tw with skip did not match.\n");
439
440        /*iso_2022_jp*/
441        if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
442                to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
443                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
444            log_err("u-> iso-2022-jp with skip did not match.\n");
445
446        /* with context */
447        if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
448                to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
449                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
450            log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
451
452        /*iso_2022_cn*/
453        if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
454                to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
455                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
456            log_err("u-> iso-2022-cn with skip did not match.\n");
457        /*with context*/
458        if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]),
459                to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn",
460                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
461            log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
462
463        /*iso_2022_kr*/
464        if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
465                to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
466                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
467            log_err("u-> iso-2022-kr with skip did not match.\n");
468          /*with context*/
469        if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]),
470                to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr",
471                UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
472            log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
473
474        /*hz*/
475        if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
476                to_hz, sizeof(to_hz), "HZ",
477                UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
478            log_err("u-> HZ with skip did not match.\n");
479          /*with context*/
480        if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]),
481                to_hz1, sizeof(to_hz1), "hz",
482                UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
483            log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
484#endif
485
486        /*SCSU*/
487        if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
488                to_SCSU, sizeof(to_SCSU), "SCSU",
489                UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
490            log_err("u-> SCSU with skip did not match.\n");
491
492#if !UCONFIG_NO_LEGACY_CONVERSION
493        /*ISCII*/
494        if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
495                to_iscii, sizeof(to_iscii), "ISCII,version=0",
496                UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
497            log_err("u-> iscii with skip did not match.\n");
498        /*with context*/
499        if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]),
500                to_iscii1, sizeof(to_iscii1), "ISCII,version=0",
501                UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
502            log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
503#endif
504    }
505
506    log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
507    {
508        static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
509            0xFB, 0xEE, 0x28,       /* from source offset 0 */
510            0x24, 0x1E, 0x52,
511            0xB2,
512            0x20,
513            0xB3,
514            0xB1,
515            0x0D,
516            0x0A,
517
518            0x20,                   /* from 8 */
519            0x00,
520            0xD0, 0x6C,
521            0xB6,
522            0xD8, 0xA5,
523            0x20,
524            0x68,
525            0x59,
526
527            0xF9, 0x28,             /* from 16 */
528            0x6D,
529            0x20,
530            0x73,
531            0xE0, 0x2D,
532            0xDE, 0x43,
533            0xD0, 0x33,
534            0x20,
535
536            0xFA, 0x83,             /* from 24 */
537            0x25, 0x01,
538            0xFB, 0x16, 0x87,
539            0x4B, 0x16,
540            0x20,
541            0xE6, 0xBD,
542            0xEB, 0x5B,
543            0x4B, 0xCC,
544
545            0xF9, 0xA2,             /* from 32 */
546            0xFC, 0x10, 0x3E,
547            0xFE, 0x16, 0x3A, 0x8C,
548            0x20,
549            0xFC, 0x03, 0xAC,
550
551            0x01,                   /* from 41 */
552            0xDE, 0x83,
553            0x20,
554            0x09
555        };
556        static const UChar expected[]={
557            0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
558            0x0063, 0x0061, 0x000D, 0x000A,
559
560            0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
561            0x0930, 0x0020, 0x0918, 0x0909,
562
563            0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
564            0x4000, 0x4E00, 0x7777, 0x0020,
565
566            0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
567            0x0020, 0xD7A3, 0xDC00, 0xD800,
568
569            0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
570            0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
571
572            0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
573            0x0009
574        };
575        static const int32_t offsets[]={
576            0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
577            8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
578            16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
579            24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
580            32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
581            41, 42, 42, 43, 44
582        };
583
584        /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
585        if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
586                                 sampleText, sizeof(sampleText),
587                                 "BOCU-1",
588                                 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
589        ) {
590            log_err("u->BOCU-1 with skip did not match.\n");
591        }
592    }
593
594    log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
595    {
596        const uint8_t sampleText[]={
597            0x61,                               /* 'a' */
598            0xc4, 0xb5,                         /* U+0135 */
599            0xed, 0x80, 0xa0,                   /* Hangul U+d020 */
600            0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
601            0xee, 0x80, 0x80,                   /* PUA U+e000 */
602            0xed, 0xb0, 0x81,                   /* unpaired trail surrogate U+dc01 */
603            0x62,                               /* 'b' */
604            0xed, 0xa0, 0x81,                   /* unpaired lead surrogate U+d801 */
605            0xd0, 0x80                          /* U+0400 */
606        };
607        UChar expected[]={
608            0x0061,
609            0x0135,
610            0xd020,
611            0xd801, 0xdc01,
612            0xe000,
613            0xdc01,
614            0x0062,
615            0xd801,
616            0x0400
617        };
618        int32_t offsets[]={
619            0,
620            1, 1,
621            2, 2, 2,
622            3, 3, 3, 4, 4, 4,
623            5, 5, 5,
624            6, 6, 6,
625            7,
626            8, 8, 8,
627            9, 9
628        };
629
630        /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
631
632        /* without offsets */
633        if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
634                                 sampleText, sizeof(sampleText),
635                                 "CESU-8",
636                                 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
637        ) {
638            log_err("u->CESU-8 with skip did not match.\n");
639        }
640
641        /* with offsets */
642        if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
643                                 sampleText, sizeof(sampleText),
644                                 "CESU-8",
645                                 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
646        ) {
647            log_err("u->CESU-8 with skip did not match.\n");
648        }
649    }
650
651    /*to Unicode*/
652    log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP  \n");
653
654#if !UCONFIG_NO_LEGACY_CONVERSION
655    {
656
657        static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
658        static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
659        static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
660
661        static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5};
662        static const int32_t  fromIBM943Offs [] = { 0, 2, 4};
663        static const int32_t  fromIBM930Offs [] = { 1, 3, 5};
664
665        if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949),
666                 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949",
667                UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
668            log_err("ibm-949->u with skip did not match.\n");
669        if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943),
670                 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943",
671                UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
672            log_err("ibm-943->u with skip did not match.\n");
673
674
675        if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930),
676                 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
677                UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
678            log_err("ibm-930->u with skip did not match.\n");
679
680
681        if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930),
682                 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
683                UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
684            log_err("ibm-930->u with skip did not match.\n");
685    }
686#endif
687
688    {
689        static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
690        static const UChar usasciiToU[] = { 0x61, 0x31 };
691        static const int32_t usasciiToUOffsets[] = { 0, 2 };
692
693        static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
694        static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
695        static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
696
697        /* US-ASCII */
698        if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
699                                 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
700                                 "US-ASCII",
701                                 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
702                                 NULL, 0)
703        ) {
704            log_err("US-ASCII->u with skip did not match.\n");
705        }
706
707#if !UCONFIG_NO_LEGACY_CONVERSION
708        /* SBCS NLTC codepage 367 for US-ASCII */
709        if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
710                                 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
711                                 "ibm-367",
712                                 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
713                                 NULL, 0)
714        ) {
715            log_err("ibm-367->u with skip did not match.\n");
716        }
717#endif
718
719        /* ISO-Latin-1 */
720        if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
721                                 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
722                                 "LATIN_1",
723                                 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
724                                 NULL, 0)
725        ) {
726            log_err("LATIN_1->u with skip did not match.\n");
727        }
728
729#if !UCONFIG_NO_LEGACY_CONVERSION
730        /* windows-1252 */
731        if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
732                                 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
733                                 "windows-1252",
734                                 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
735                                 NULL, 0)
736        ) {
737            log_err("windows-1252->u with skip did not match.\n");
738        }
739#endif
740    }
741
742#if !UCONFIG_NO_LEGACY_CONVERSION
743    {
744        static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
745            0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
746        };
747        static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0x03b4
748        };
749        static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
750
751
752         /* euc-jp*/
753        static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
754            /* BEGIN android-changed */
755            /* Android uses a different EUC-JP table. We change this byte sequence,
756             * choosing one that is unassigned in both tables. */
757            0x8f, 0xa1, 0xa1,  /*unassigned*/
758            /* 0x8f, 0xda, 0xa1, */ /*unassigned*/
759            /* END android-changed */
760           0x8e, 0xe0,
761        };
762        static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
763        static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
764
765         /*EUC_TW*/
766        static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
767            0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
768           0xe6, 0xca, 0x8a,
769        };
770        static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
771        static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
772                /*iso-2022-jp*/
773        static const uint8_t sampleTxt_iso_2022_jp[]={
774            0x41,
775            0x1b,   0x24,   0x42,   0x2A, 0x44, /*unassigned*/
776             0x1b,   0x28,   0x42,   0x42,
777
778        };
779        static const UChar iso_2022_jptoUnicode[]={    0x41,0x42 };
780        static const int32_t from_iso_2022_jpOffs [] ={  0,9   };
781
782        /*iso-2022-cn*/
783        static const uint8_t sampleTxt_iso_2022_cn[]={
784            0x0f,   0x41,   0x44,
785            0x1B,   0x24,   0x29,   0x47,
786            0x0E,   0x40,   0x6f, /*unassigned*/
787            0x0f,   0x42,
788
789        };
790
791        static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x42 };
792        static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   11   };
793
794        /*iso-2022-kr*/
795        static const uint8_t sampleTxt_iso_2022_kr[]={
796          0x1b, 0x24, 0x29,  0x43,
797          0x41,
798          0x0E, 0x7f, 0x1E,
799          0x0e, 0x25, 0x50,
800          0x0f, 0x51,
801          0x42, 0x43,
802
803        };
804        static const UChar iso_2022_krtoUnicode[]={     0x41,0x03A0,0x51, 0x42,0x43};
805        static const int32_t from_iso_2022_krOffs [] ={  4,    9,    12,   13  , 14 };
806
807        /*hz*/
808        static const uint8_t sampleTxt_hz[]={
809            0x41,
810            0x7e,   0x7b,   0x26,   0x30,
811            0x7f,   0x1E, /*unassigned*/
812            0x26,   0x30,
813            0x7e,   0x7d,   0x42,
814            0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
815            0x7e,   0x7d,   0x42,
816        };
817        static const UChar hztoUnicode[]={
818            0x41,
819            0x03a0,
820            0x03A0,
821            0x42,
822            0x42,};
823
824        static const int32_t from_hzOffs [] ={0,3,7,11,18,  };
825
826        /*ISCII*/
827        static const uint8_t sampleTxt_iscii[]={
828            0x41,
829            0xa1,
830            0xEB,    /*unassigned*/
831            0x26,
832            0x30,
833            0xa2,
834            0xEC,    /*unassigned*/
835            0x42,
836        };
837        static const UChar isciitoUnicode[]={
838            0x41,
839            0x0901,
840            0x26,
841            0x30,
842            0x0902,
843            0x42,
844            };
845
846        static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
847
848        /*LMBCS*/
849        static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
850            0x12, 0x92, 0xa0, /*unassigned*/
851            0x12, 0x92, 0xA1,
852        };
853        static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
854        static const int32_t fromLMBCS[] = {0, 6};
855
856        if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
857             EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
858            UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
859        log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
860
861        if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
862             EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
863            UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
864        log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
865
866        if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
867                 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
868                UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
869            log_err("euc-jp->u with skip did not match.\n");
870
871
872
873        if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
874                 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
875                UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
876            log_err("euc-tw->u with skip did not match.\n");
877
878
879        if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
880                 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
881                UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
882            log_err("iso-2022-jp->u with skip did not match.\n");
883
884        if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
885                 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
886                UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
887            log_err("iso-2022-cn->u with skip did not match.\n");
888
889        if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
890                 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
891                UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
892            log_err("iso-2022-kr->u with skip did not match.\n");
893
894        if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
895                 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
896                UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
897            log_err("HZ->u with skip did not match.\n");
898
899        if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
900                 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
901                UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
902            log_err("iscii->u with skip did not match.\n");
903
904        if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS),
905                LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1",
906                UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
907            log_err("LMBCS->u with skip did not match.\n");
908
909    }
910#endif
911
912    log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
913    {
914        const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
915            0xe0, 0x80,  0x61,};
916        UChar    expected1[] = {  0x0031, 0x4e8c, 0x0061};
917        int32_t offsets1[] = {   0x0000, 0x0001, 0x0006};
918
919        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
920                 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
921                UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
922            log_err("utf8->u with skip did not match.\n");;
923    }
924
925    log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
926    {
927        const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
928        UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffe,0xfffe};
929        int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
930
931        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
932                 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
933                UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
934            log_err("scsu->u with skip did not match.\n");
935    }
936
937    log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
938    {
939        const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
940            0xFB, 0xEE, 0x28,       /* single-code point sequence at offset 0 */
941            0x24, 0x1E, 0x52,       /* 3 */
942            0xB2,                   /* 6 */
943            0x20,                   /* 7 */
944            0x40, 0x07,             /* 8 - wrong trail byte */
945            0xB3,                   /* 10 */
946            0xB1,                   /* 11 */
947            0xD0, 0x20,             /* 12 - wrong trail byte */
948            0x0D,                   /* 14 */
949            0x0A,                   /* 15 */
950            0x20,                   /* 16 */
951            0x00,                   /* 17 */
952            0xD0, 0x6C,             /* 18 */
953            0xB6,                   /* 20 */
954            0xD8, 0xA5,             /* 21 */
955            0x20,                   /* 23 */
956            0x68,                   /* 24 */
957            0x59,                   /* 25 */
958            0xF9, 0x28,             /* 26 */
959            0x6D,                   /* 28 */
960            0x20,                   /* 29 */
961            0x73,                   /* 30 */
962            0xE0, 0x2D,             /* 31 */
963            0xDE, 0x43,             /* 33 */
964            0xD0, 0x33,             /* 35 */
965            0x20,                   /* 37 */
966            0xFA, 0x83,             /* 38 */
967            0x25, 0x01,             /* 40 */
968            0xFB, 0x16, 0x87,       /* 42 */
969            0x4B, 0x16,             /* 45 */
970            0x20,                   /* 47 */
971            0xE6, 0xBD,             /* 48 */
972            0xEB, 0x5B,             /* 50 */
973            0x4B, 0xCC,             /* 52 */
974            0xF9, 0xA2,             /* 54 */
975            0xFC, 0x10, 0x3E,       /* 56 */
976            0xFE, 0x16, 0x3A, 0x8C, /* 59 */
977            0x20,                   /* 63 */
978            0xFC, 0x03, 0xAC,       /* 64 */
979            0xFF,                   /* 67 - FF just resets the state without encoding anything */
980            0x01,                   /* 68 */
981            0xDE, 0x83,             /* 69 */
982            0x20,                   /* 71 */
983            0x09                    /* 72 */
984        };
985        UChar expected[]={
986            0xFEFF, 0x0061, 0x0062, 0x0020,
987            0x0063, 0x0061, 0x000D, 0x000A,
988            0x0020, 0x0000, 0x00DF, 0x00E6,
989            0x0930, 0x0020, 0x0918, 0x0909,
990            0x3086, 0x304D, 0x0020, 0x3053,
991            0x4000, 0x4E00, 0x7777, 0x0020,
992            0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
993            0x0020, 0xD7A3, 0xDC00, 0xD800,
994            0xD800, 0xDC00, 0xD845, 0xDDDD,
995            0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
996            0xDFFF, 0x0001, 0x0E40, 0x0020,
997            0x0009
998        };
999        int32_t offsets[]={
1000            0, 3, 6, 7, /* skip 8, */
1001            10, 11, /* skip 12, */
1002            14, 15, 16, 17, 18,
1003            20, 21, 23, 24, 25, 26, 28, 29,
1004            30, 31, 33, 35, 37, 38,
1005            40, 42, 45, 47, 48,
1006            50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1007            63, 64, /* trail */ 64, /* reset only 67, */
1008            68, 69,
1009            71, 72
1010        };
1011
1012        if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1013                                 expected, ARRAY_LENGTH(expected), "BOCU-1",
1014                                 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1015        ) {
1016            log_err("BOCU-1->u with skip did not match.\n");
1017        }
1018    }
1019
1020    log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1021    {
1022        const uint8_t sampleText[]={
1023            0x61,                               /* 0  'a' */
1024            0xc0, 0x80,                         /* 1  non-shortest form */
1025            0xc4, 0xb5,                         /* 3  U+0135 */
1026            0xed, 0x80, 0xa0,                   /* 5  Hangul U+d020 */
1027            0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8  surrogate pair for U+10401 */
1028            0xee, 0x80, 0x80,                   /* 14 PUA U+e000 */
1029            0xed, 0xb0, 0x81,                   /* 17 unpaired trail surrogate U+dc01 */
1030            0xf0, 0x90, 0x80, 0x80,             /* 20 illegal 4-byte form for U+10000 */
1031            0x62,                               /* 24 'b' */
1032            0xed, 0xa0, 0x81,                   /* 25 unpaired lead surrogate U+d801 */
1033            0xed, 0xa0,                         /* 28 incomplete sequence */
1034            0xd0, 0x80                          /* 30 U+0400 */
1035        };
1036        UChar expected[]={
1037            0x0061,
1038            /* skip */
1039            0x0135,
1040            0xd020,
1041            0xd801, 0xdc01,
1042            0xe000,
1043            0xdc01,
1044            /* skip */
1045            0x0062,
1046            0xd801,
1047            0x0400
1048        };
1049        int32_t offsets[]={
1050            0,
1051            /* skip 1, */
1052            3,
1053            5,
1054            8, 11,
1055            14,
1056            17,
1057            /* skip 20, 20, */
1058            24,
1059            25,
1060            /* skip 28 */
1061            30
1062        };
1063
1064        /* without offsets */
1065        if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1066                                 expected, ARRAY_LENGTH(expected), "CESU-8",
1067                                 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1068        ) {
1069            log_err("CESU-8->u with skip did not match.\n");
1070        }
1071
1072        /* with offsets */
1073        if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1074                                 expected, ARRAY_LENGTH(expected), "CESU-8",
1075                                 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1076        ) {
1077            log_err("CESU-8->u with skip did not match.\n");
1078        }
1079    }
1080}
1081
1082static void TestStop(int32_t inputsize, int32_t outputsize)
1083{
1084    static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1085    static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1086
1087    static const uint8_t expstopIBM_949[]= {
1088        0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1089
1090    static const uint8_t expstopIBM_943[] = {
1091        0x9f, 0xaf, 0x9f, 0xb1};
1092
1093    static const uint8_t expstopIBM_930[] = {
1094        0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1095
1096    static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1097    static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1098    static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1099
1100
1101    static const int32_t  toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1102    static const int32_t  toIBM943Offsstop [] = { 0, 0, 1, 1};
1103    static const int32_t  toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1104
1105    static const int32_t  fromIBM949Offs [] = { 0, 1, 3};
1106    static const int32_t  fromIBM943Offs [] = { 0, 2};
1107    static const int32_t  fromIBM930Offs [] = { 1, 3};
1108
1109    gInBufferSize = inputsize;
1110    gOutBufferSize = outputsize;
1111
1112    /*From Unicode*/
1113
1114#if !UCONFIG_NO_LEGACY_CONVERSION
1115    if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1116            expstopIBM_949, sizeof(expstopIBM_949), "ibm-949",
1117            UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1118        log_err("u-> ibm-949 with stop did not match.\n");
1119    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1120            expstopIBM_943, sizeof(expstopIBM_943), "ibm-943",
1121            UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1122        log_err("u-> ibm-943 with stop did not match.\n");
1123    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1124            expstopIBM_930, sizeof(expstopIBM_930), "ibm-930",
1125            UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1126        log_err("u-> ibm-930 with stop did not match.\n");
1127
1128    log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP  \n");
1129    {
1130        static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1131        static const uint8_t toIBM943[]= { 0x61,};
1132        static const int32_t offset[]= {0,} ;
1133
1134         /*EUC_JP*/
1135        static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1136        static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1137        static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1138
1139        /*EUC_TW*/
1140        static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1141        static const uint8_t to_euc_tw[]={
1142            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1143        static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1144
1145        /*ISO-2022-JP*/
1146        static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1147        static const uint8_t to_iso_2022_jp[]={
1148             0x41,
1149
1150        };
1151        static const int32_t from_iso_2022_jpOffs [] ={0,};
1152
1153        /*ISO-2022-cn*/
1154        static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1155        static const uint8_t to_iso_2022_cn[]={
1156            0x41,
1157
1158        };
1159        static const int32_t from_iso_2022_cnOffs [] ={
1160            0,0,
1161            2,2,
1162        };
1163
1164        /*ISO-2022-kr*/
1165        static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1166        static const uint8_t to_iso_2022_kr[]={
1167            0x1b,   0x24,   0x29,   0x43,
1168            0x41,
1169            0x0e,   0x25,   0x50,
1170        };
1171        static const int32_t from_iso_2022_krOffs [] ={
1172            -1,-1,-1,-1,
1173             0,
1174            1,1,1,
1175        };
1176
1177        /* HZ encoding */
1178        static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1179
1180        static const uint8_t to_hz[]={
1181            0x7e,   0x7d, 0x41,
1182            0x7e,   0x7b,   0x26,   0x30,
1183
1184        };
1185        static const int32_t from_hzOffs [] ={
1186            0, 0,0,
1187            1,1,1,1,
1188        };
1189
1190        /*ISCII*/
1191        static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1192        static const uint8_t to_iscii[]={
1193            0x41,
1194        };
1195        static const int32_t from_isciiOffs [] ={
1196            0,
1197        };
1198
1199        if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1200                toIBM943, sizeof(toIBM943), "ibm-943",
1201                UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1202            log_err("u-> ibm-943 with stop did not match.\n");
1203
1204        if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1205                to_euc_jp, sizeof(to_euc_jp), "euc-jp",
1206                UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1207            log_err("u-> euc-jp with stop did not match.\n");
1208
1209        if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1210                to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1211                UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1212            log_err("u-> euc-tw with stop did not match.\n");
1213
1214        if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1215                to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1216                UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1217            log_err("u-> iso-2022-jp with stop did not match.\n");
1218
1219        if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1220                to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1221                UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1222            log_err("u-> iso-2022-jp with stop did not match.\n");
1223
1224        if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
1225                to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
1226                UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1227            log_err("u-> iso-2022-cn with stop did not match.\n");
1228
1229        if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
1230                to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
1231                UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1232            log_err("u-> iso-2022-kr with stop did not match.\n");
1233
1234        if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
1235                to_hz, sizeof(to_hz), "HZ",
1236                UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1237            log_err("u-> HZ with stop did not match.\n");\
1238
1239        if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
1240                to_iscii, sizeof(to_iscii), "ISCII,version=0",
1241                UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1242            log_err("u-> iscii with stop did not match.\n");
1243
1244
1245    }
1246#endif
1247
1248    log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1249    {
1250        static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1251
1252        static const uint8_t to_SCSU[]={
1253            0x41,
1254
1255        };
1256        int32_t from_SCSUOffs [] ={
1257            0,
1258
1259        };
1260        if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1261                to_SCSU, sizeof(to_SCSU), "SCSU",
1262                UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1263            log_err("u-> SCSU with skip did not match.\n");
1264
1265    }
1266
1267    /*to Unicode*/
1268
1269#if !UCONFIG_NO_LEGACY_CONVERSION
1270    if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949),
1271             IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949",
1272            UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1273        log_err("ibm-949->u with stop did not match.\n");
1274    if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943),
1275             IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943",
1276            UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1277        log_err("ibm-943->u with stop did not match.\n");
1278    if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930),
1279             IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930",
1280            UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1281        log_err("ibm-930->u with stop did not match.\n");
1282
1283    log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1284    {
1285
1286        static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1287            0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1288        };
1289        static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63 };
1290        static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1291
1292         /*EUC-JP*/
1293        static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1294            /* BEGIN android-changed */
1295            /* Android uses a different EUC-JP table. We change this byte sequence,
1296             * choosing one that is unassigned in both tables. */
1297            0x8f, 0xa1, 0xa1,  /*unassigned*/
1298            /* 0x8f, 0xda, 0xa1, */ /*unassigned*/
1299            /* END android-changed */
1300           0x8e, 0xe0,
1301        };
1302        static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1303        static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1304
1305          /*EUC_TW*/
1306        static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1307            0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1308           0xe6, 0xca, 0x8a,
1309        };
1310        UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1311        int32_t from_euc_twOffs [] ={ 0, 1, 3};
1312
1313
1314
1315         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1316             EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1317            UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1318        log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1319
1320        if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1321             euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1322            UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1323        log_err("euc-jp->u with stop did not match.\n");
1324
1325        if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1326                 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1327                UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1328            log_err("euc-tw->u with stop did not match.\n");
1329    }
1330#endif
1331
1332    log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1333    {
1334        static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1335            0xe0, 0x80,  0x61,};
1336        static const UChar    expected1[] = {  0x0031, 0x4e8c,};
1337        static const int32_t offsets1[] = {   0x0000, 0x0001};
1338
1339        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1340                 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1341                UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1342            log_err("utf8->u with stop did not match.\n");;
1343    }
1344    log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1345    {
1346        static const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1347        static const UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061};
1348        static const int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003};
1349
1350        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1351                 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1352                UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1353            log_err("scsu->u with stop did not match.\n");;
1354    }
1355
1356}
1357
1358static void TestSub(int32_t inputsize, int32_t outputsize)
1359{
1360    static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1361    static const UChar sampleText2[]=    { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1362
1363    static const uint8_t expsubIBM_949[] =
1364     { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1365
1366    static const uint8_t expsubIBM_943[] = {
1367        0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1368
1369    static const uint8_t expsubIBM_930[] = {
1370        0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1371
1372    static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1373    static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1374    static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1375
1376    static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1377    static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1378    static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1379
1380    static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1381    static const int32_t  fromIBM943Offs [] = { 0, 2, 4, 6 };
1382    static const int32_t  fromIBM930Offs [] = { 1, 3, 5, 7 };
1383
1384    gInBufferSize = inputsize;
1385    gOutBufferSize = outputsize;
1386
1387    /*from unicode*/
1388
1389#if !UCONFIG_NO_LEGACY_CONVERSION
1390    if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1391            expsubIBM_949, sizeof(expsubIBM_949), "ibm-949",
1392            UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1393        log_err("u-> ibm-949 with subst did not match.\n");
1394    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1395            expsubIBM_943, sizeof(expsubIBM_943), "ibm-943",
1396            UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1397        log_err("u-> ibm-943 with subst did not match.\n");
1398    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1399            expsubIBM_930, sizeof(expsubIBM_930), "ibm-930",
1400            UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1401        log_err("u-> ibm-930 with subst did not match.\n");
1402
1403    log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE  \n");
1404    {
1405        static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1406        static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1407        static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1408
1409
1410        /* EUC_JP*/
1411        static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1412        static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1413            0xf4, 0xfe, 0xf4, 0xfe,
1414            0x61, 0x8e, 0xe0,
1415        };
1416        static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1417
1418        /*EUC_TW*/
1419        static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1420        static const uint8_t to_euc_tw[]={
1421            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1422            0xfd, 0xfe, 0xfd, 0xfe,
1423            0x61, 0xe6, 0xca, 0x8a,
1424        };
1425
1426        static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1427
1428        if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1429                toIBM943, sizeof(toIBM943), "ibm-943",
1430                UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1431            log_err("u-> ibm-943 with substitute did not match.\n");
1432
1433        if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1434                to_euc_jp, sizeof(to_euc_jp), "euc-jp",
1435                UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1436            log_err("u-> euc-jp with substitute did not match.\n");
1437
1438        if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1439                to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1440                UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1441            log_err("u-> euc-tw with substitute did not match.\n");
1442    }
1443#endif
1444
1445    log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1446    {
1447        UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1448
1449        const uint8_t to_SCSU[]={
1450            0x41,
1451            0x0e, 0xff,0xfd,
1452            0x42
1453
1454
1455        };
1456        int32_t from_SCSUOffs [] ={
1457            0,
1458            1,1,1,
1459            2,
1460
1461        };
1462        const uint8_t to_SCSU_1[]={
1463            0x41,
1464
1465        };
1466        int32_t from_SCSUOffs_1 [] ={
1467            0,
1468
1469        };
1470        if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1471                to_SCSU, sizeof(to_SCSU), "SCSU",
1472                UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1473            log_err("u-> SCSU with substitute did not match.\n");
1474
1475        if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1476                to_SCSU_1, sizeof(to_SCSU_1), "SCSU",
1477                UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1478            log_err("u-> SCSU with substitute did not match.\n");
1479    }
1480
1481    log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1482    {
1483        static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1484        static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1485                           0xf0, 0x90, 0x90, 0x81,
1486                           0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1487                           0xef, 0xbf, 0xbf, 0x61,
1488
1489        };
1490        static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1491        if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]),
1492                expectedUTF8, sizeof(expectedUTF8), "utf8",
1493                UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1494            log_err("u-> utf8 with stop did not match.\n");
1495        }
1496    }
1497
1498    log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1499    {
1500        static const UChar in[]={ 0x0041, 0xfeff };
1501
1502        static const uint8_t out[]={
1503#if U_IS_BIG_ENDIAN
1504            0xfe, 0xff,
1505            0x00, 0x41,
1506            0xfe, 0xff
1507#else
1508            0xff, 0xfe,
1509            0x41, 0x00,
1510            0xff, 0xfe
1511#endif
1512        };
1513        static const int32_t offsets[]={
1514            -1, -1, 0, 0, 1, 1
1515        };
1516
1517        if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1518                                   out, sizeof(out), "UTF-16",
1519                                   UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1520        ) {
1521            log_err("u->UTF-16 with substitute did not match.\n");
1522        }
1523    }
1524
1525    log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1526    {
1527        static const UChar in[]={ 0x0041, 0xfeff };
1528
1529        static const uint8_t out[]={
1530#if U_IS_BIG_ENDIAN
1531            0x00, 0x00, 0xfe, 0xff,
1532            0x00, 0x00, 0x00, 0x41,
1533            0x00, 0x00, 0xfe, 0xff
1534#else
1535            0xff, 0xfe, 0x00, 0x00,
1536            0x41, 0x00, 0x00, 0x00,
1537            0xff, 0xfe, 0x00, 0x00
1538#endif
1539        };
1540        static const int32_t offsets[]={
1541            -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1542        };
1543
1544        if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1545                                   out, sizeof(out), "UTF-32",
1546                                   UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1547        ) {
1548            log_err("u->UTF-32 with substitute did not match.\n");
1549        }
1550    }
1551
1552    /*to unicode*/
1553
1554#if !UCONFIG_NO_LEGACY_CONVERSION
1555    if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949),
1556             IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949",
1557            UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1558        log_err("ibm-949->u with substitute did not match.\n");
1559    if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943),
1560             IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943",
1561            UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1562        log_err("ibm-943->u with substitute did not match.\n");
1563    if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930),
1564             IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930",
1565            UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1566        log_err("ibm-930->u with substitute did not match.\n");
1567
1568    log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1569    {
1570
1571        const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1572            0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1573        };
1574        UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0xfffd, 0x03b4
1575        };
1576        int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1577
1578
1579        /* EUC_JP*/
1580        const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1581            /* BEGIN android-changed */
1582            /* Android uses a different EUC-JP table. We change this byte sequence,
1583             * choosing one that is unassigned in both tables. */
1584            0x8f, 0xa1, 0xa1,  /*unassigned*/
1585            /* 0x8f, 0xda, 0xa1, */ /*unassigned*/
1586            /* END android-changed */
1587           0x8e, 0xe0, 0x8a
1588        };
1589        UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1590        int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6,  9, 11 };
1591
1592        /*EUC_TW*/
1593        const uint8_t sampleTxt_euc_tw[]={
1594            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1595            0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1596            0xe6, 0xca, 0x8a,
1597        };
1598        UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1599        int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1600
1601
1602        if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1603           EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1604          UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1605            log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1606
1607
1608        if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1609           euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1610          UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1611            log_err("euc-jp->u with substitute did not match.\n");
1612
1613
1614        if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1615           euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1616          UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1617            log_err("euc-tw->u with substitute  did not match.\n");
1618
1619
1620        if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1621           euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"euc-jp",
1622          UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1623            log_err("euc-jp->u with substitute did not match.\n");
1624    }
1625#endif
1626
1627    log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1628    {
1629        const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1630            0xe0, 0x80,  0x61,};
1631        UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0x0061};
1632        int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0006};
1633
1634        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1635                 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1636                UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1637            log_err("utf8->u with substitute did not match.\n");;
1638    }
1639    log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1640    {
1641        const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1642        UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffd,0xfffd};
1643        int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
1644
1645        if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1646                 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1647                UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1648            log_err("scsu->u with stop did not match.\n");;
1649    }
1650
1651#if !UCONFIG_NO_LEGACY_CONVERSION
1652    log_verbose("Testing ibm-930 subchar/subchar1\n");
1653    {
1654        static const UChar u1[]={         0x6d63,           0x6d64,     0x6d65,     0x6d66,     0xdf };
1655        static const uint8_t s1[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1656        static const int32_t offsets1[]={ 0,    0,    0,    1,    1,    2,    2,    3,    3,    4,    4 };
1657
1658        static const UChar u2[]={         0x6d63,           0x6d64,     0xfffd,     0x6d66,     0x1a };
1659        static const uint8_t s2[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1660        static const int32_t offsets2[]={ 1,                3,          5,          7,          10 };
1661
1662        if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930",
1663                                   UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1664        ) {
1665            log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1666        }
1667
1668        if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930",
1669                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1670        ) {
1671            log_err("ibm-930->u subchar/subchar1 did not match.\n");
1672        }
1673    }
1674
1675    log_verbose("Testing GB 18030 with substitute callbacks\n");
1676    {
1677        static const UChar u2[]={
1678            0x24, 0x7f, 0x80,                   0x1f9,      0x20ac,     0x4e00,     0x9fa6,                 0xffff,                 0xd800, 0xdc00,         0xfffd,                 0xdbff, 0xdfff };
1679        static const uint8_t gb2[]={
1680            0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1681        static const int32_t offsets2[]={
1682            0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1683
1684        if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030",
1685                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1686        ) {
1687            log_err("gb18030->u with substitute did not match.\n");
1688        }
1689    }
1690#endif
1691
1692    log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1693    {
1694        static const uint8_t utf7[]={
1695         /* a~            a+AB~                           a+AB\x0c                        a+AB-                         a+AB.                         a+. */
1696            0x61, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x0c,   0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b,   0x2e
1697        };
1698        static const UChar unicode[]={
1699            0x61, 0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,           0x61,       0xfffd,     0x2e, 0x61, 0xfffd, 0x2e
1700        };
1701        static const int32_t offsets[]={
1702            0,    1,      2,          4,          6,      7,          9,          11,     12,         14,               17,         19,         21,   22,   23,     24
1703        };
1704
1705        if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7",
1706                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1707        ) {
1708            log_err("UTF-7->u with substitute did not match.\n");
1709        }
1710    }
1711
1712    log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1713    {
1714        static const uint8_t
1715            in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1716            in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1717            in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1718
1719        static const UChar
1720            out1[]={ 0x4e00, 0xfeff },
1721            out2[]={ 0x004e, 0xfffe },
1722            out3[]={ 0xfefd, 0x4e00, 0xfeff };
1723
1724        static const int32_t
1725            offsets1[]={ 2, 4 },
1726            offsets2[]={ 2, 4 },
1727            offsets3[]={ 0, 2, 4 };
1728
1729        if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16",
1730                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1731        ) {
1732            log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1733        }
1734
1735        if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16",
1736                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1737        ) {
1738            log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1739        }
1740
1741        if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16",
1742                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1743        ) {
1744            log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1745        }
1746    }
1747
1748    log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1749    {
1750        static const uint8_t
1751            in1[]={ 0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff },
1752            in2[]={ 0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00 },
1753            in3[]={ 0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01 },
1754            in4[]={ 0x00, 0x01, 0x02, 0x03,   0x00, 0x11, 0x12, 0x00,   0x00, 0x00, 0x4e, 0x00 };
1755
1756        static const UChar
1757            out1[]={ UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfeff },
1758            out2[]={ UTF16_LEAD(0x0f1000), UTF16_TRAIL(0x0f1000), 0xfffe },
1759            out3[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1760            out4[]={ UTF16_LEAD(0x10203), UTF16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1761
1762        static const int32_t
1763            offsets1[]={ 4, 4, 8 },
1764            offsets2[]={ 4, 4, 8 },
1765            offsets3[]={ 0, 4, 4, 8, 12 },
1766            offsets4[]={ 0, 0, 4, 8 };
1767
1768        if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32",
1769                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1770        ) {
1771            log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1772        }
1773
1774        if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32",
1775                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1776        ) {
1777            log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1778        }
1779
1780        if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32",
1781                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1782        ) {
1783            log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1784        }
1785
1786        if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32",
1787                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1788        ) {
1789            log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1790        }
1791    }
1792}
1793
1794static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1795{
1796    UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1797    UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1798
1799    const uint8_t expsubwvalIBM_949[]= {
1800        0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1801        0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1802
1803    const uint8_t expsubwvalIBM_943[]= {
1804        0x9f, 0xaf, 0x9f, 0xb1,
1805        0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1806
1807    const uint8_t expsubwvalIBM_930[] = {
1808        0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1809
1810    int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1811    int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1812    int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1813
1814    gInBufferSize = inputsize;
1815    gOutBufferSize = outputsize;
1816
1817    /*from Unicode*/
1818
1819#if !UCONFIG_NO_LEGACY_CONVERSION
1820    if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1821            expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949",
1822            UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1823        log_err("u-> ibm-949 with subst with value did not match.\n");
1824
1825    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1826            expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943",
1827            UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1828        log_err("u-> ibm-943 with sub with value did not match.\n");
1829
1830    if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1831            expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930",
1832            UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1833        log_err("u-> ibm-930 with subst with value did not match.\n");
1834
1835
1836    log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
1837    {
1838        static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1839        static const uint8_t toIBM943[]= { 0x61,
1840            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1841            0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1842            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1843            0x61 };
1844        static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1845
1846
1847         /* EUC_JP*/
1848        static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1849        static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1850            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1851            0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1852            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1853            0x61, 0x8e, 0xe0,
1854        };
1855        static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1856            3, 3, 3, 3, 3, 3,
1857            3, 3, 3, 3, 3, 3,
1858            5, 5, 5, 5, 5, 5,
1859            6, 7, 7,
1860        };
1861
1862        /*EUC_TW*/
1863        static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1864        static const uint8_t to_euc_tw[]={
1865            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1866            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1867            0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1868            0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1869            0x61, 0xe6, 0xca, 0x8a,
1870        };
1871        static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1872             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1873             6, 7, 7, 8,
1874        };
1875        /*ISO-2022-JP*/
1876        static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1877        static const uint8_t to_iso_2022_jp1[]={
1878            0x1b,   0x24,   0x42,   0x21, 0x21,
1879            0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1880            0x1b,   0x24,   0x42,   0x21, 0x22,
1881            0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1882            0x42,
1883        };
1884
1885        static const int32_t from_iso_2022_jpOffs1 [] ={
1886            0,0,0,0,0,
1887            1,1,1,1,1,1,1,1,1,
1888            2,2,2,2,2,
1889            3,3,3,3,3,3,3,3,3,
1890            4,
1891        };
1892        /* surrogate pair*/
1893        static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1894        static const uint8_t to_iso_2022_jp2[]={
1895                                0x1b,   0x24,   0x42,   0x21,   0x21,
1896                                0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1897                                0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1898                                0x1b,   0x24,   0x42,   0x21,   0x22,
1899                                0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1900                                0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1901                                0x42,
1902                                };
1903        static const int32_t from_iso_2022_jpOffs2 [] ={
1904            0,0,0,0,0,
1905            1,1,1,1,1,1,1,1,1,
1906            1,1,1,1,1,1,
1907            3,3,3,3,3,
1908            4,4,4,4,4,4,4,4,4,
1909            4,4,4,4,4,4,
1910            6,
1911        };
1912
1913        /*ISO-2022-cn*/
1914        static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1915        static const uint8_t to_iso_2022_cn[]={
1916            0x41,
1917            0x25, 0x55,   0x33,   0x37,   0x31,   0x32,
1918            0x42,
1919        };
1920        static const int32_t from_iso_2022_cnOffs [] ={
1921            0,
1922            1,1,1,1,1,1,
1923            2,
1924        };
1925
1926        static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1927
1928        static const uint8_t to_iso_2022_cn4[]={
1929                             0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
1930                             0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1931                             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1932                             0x0e,   0x21,   0x22,
1933                             0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1934                             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1935                             0x42,
1936                             };
1937        static const int32_t from_iso_2022_cnOffs4 [] ={
1938            0,0,0,0,0,0,0,
1939            1,1,1,1,1,1,1,
1940            1,1,1,1,1,1,
1941            3,3,3,
1942            4,4,4,4,4,4,4,
1943            4,4,4,4,4,4,
1944            6
1945
1946        };
1947
1948        /*ISO-2022-kr*/
1949        static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1950        static const uint8_t to_iso_2022_kr2[]={
1951            0x1b,   0x24,   0x29,   0x43,
1952            0x41,
1953            0x0e,   0x25,   0x50,
1954            0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1955            0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1956            0x0e,   0x25,   0x50,
1957            0x0f,   0x42,
1958            0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1959            0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1960            0x43
1961        };
1962        static const int32_t from_iso_2022_krOffs2 [] ={
1963            -1,-1,-1,-1,
1964             0,
1965            1,1,1,
1966            2,2,2,2,2,2,2,
1967            2,2,2,2,2,2,
1968            4,4,4,
1969            5,5,
1970            6,6,6,6,6,6,
1971            6,6,6,6,6,6,
1972            8,
1973        };
1974
1975        static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1976        static const uint8_t to_iso_2022_kr[]={
1977            0x1b,   0x24,   0x29,   0x43,
1978            0x41,
1979            0x0e,   0x25,   0x50,
1980            0x0f,   0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1981            0x0e,   0x25,   0x50,
1982            0x0f,   0x42,
1983            0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1984            0x43
1985        };
1986
1987
1988        static const int32_t from_iso_2022_krOffs [] ={
1989            -1,-1,-1,-1,
1990             0,
1991            1,1,1,
1992            2,2,2,2,2,2,2,
1993            3,3,3,
1994            4,4,
1995            5,5,5,5,5,5,
1996            6,
1997        };
1998        /* HZ encoding */
1999        static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
2000
2001        static const uint8_t to_hz[]={
2002            0x7e,   0x7d,   0x41,
2003            0x7e,   0x7b,   0x26,   0x30,
2004            0x7e,   0x7d,   0x25,   0x55,   0x30,   0x36,   0x36,   0x32,  /*unassigned*/
2005            0x7e,   0x7b,   0x26,   0x30,
2006            0x7e,   0x7d,   0x42,
2007
2008        };
2009        static const int32_t from_hzOffs [] ={
2010            0,0,0,
2011            1,1,1,1,
2012            2,2,2,2,2,2,2,2,
2013            3,3,3,3,
2014            4,4,4
2015        };
2016
2017        static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2018        static const uint8_t to_hz2[]={
2019            0x7e,   0x7d,   0x41,
2020            0x7e,   0x7b,   0x26,   0x30,
2021            0x7e,   0x7d,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2022            0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2023            0x7e,   0x7b,   0x26,   0x30,
2024            0x7e,   0x7d,   0x42,
2025            0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2026            0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2027            0x43
2028        };
2029        static const int32_t from_hzOffs2 [] ={
2030            0,0,0,
2031            1,1,1,1,
2032            2,2,2,2,2,2,2,2,
2033            2,2,2,2,2,2,
2034            4,4,4,4,
2035            5,5,5,
2036            6,6,6,6,6,6,
2037            6,6,6,6,6,6,
2038            8,
2039        };
2040
2041                /*ISCII*/
2042        static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2043        static const uint8_t to_iscii[]={
2044            0x41,
2045            0xef,   0x42,   0xa1,
2046            0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2047            0xa2,
2048            0x42,
2049            0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2050            0x43
2051        };
2052
2053
2054        static const int32_t from_isciiOffs [] ={
2055            0,
2056            1,1,1,
2057            2,2,2,2,2,2,
2058            3,
2059            4,
2060            5,5,5,5,5,5,
2061            6,
2062        };
2063
2064        if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
2065                toIBM943, sizeof(toIBM943), "ibm-943",
2066                UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2067            log_err("u-> ibm-943 with subst with value did not match.\n");
2068
2069        if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
2070                to_euc_jp, sizeof(to_euc_jp), "euc-jp",
2071                UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2072            log_err("u-> euc-jp with subst with value did not match.\n");
2073
2074        if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
2075                to_euc_tw, sizeof(to_euc_tw), "euc-tw",
2076                UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2077            log_err("u-> euc-tw with subst with value did not match.\n");
2078
2079        if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2080                to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2081                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2082            log_err("u-> iso_2022_jp with subst with value did not match.\n");
2083
2084        if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2085                to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2086                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2087            log_err("u-> iso_2022_jp with subst with value did not match.\n");
2088
2089        if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
2090                to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
2091                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2092            log_err("u-> iso_2022_jp with subst with value did not match.\n");
2093        /*ESCAPE OPTIONS*/
2094        {
2095            /* surrogate pair*/
2096            static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2097            static const uint8_t to_iso_2022_jp3_v2[]={
2098                    0x1b,   0x24,   0x42,   0x21,   0x21,
2099                    0x1b,   0x28,   0x42,   0x26,   0x23,   0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2100
2101                    0x1b,   0x24,   0x42,   0x21,   0x22,
2102                    0x1b,   0x28,   0x42,   0x26,   0x23,  0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2103
2104                    0x42,
2105                    0x26,   0x23,   0x33,   0x36,   0x38,   0x39,   0x32,   0x3b,
2106                    };
2107
2108            static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2109                0,0,0,0,0,
2110                1,1,1,1,1,1,1,1,1,1,1,1,
2111
2112                3,3,3,3,3,
2113                4,4,4,4,4,4,4,4,4,4,4,4,
2114
2115                6,
2116                7,7,7,7,7,7,7,7,7
2117            };
2118
2119            if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]),
2120                    to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp",
2121                    UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2122                log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2123        }
2124        {
2125            static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2126            static const uint8_t to_iso_2022_cn5_v2[]={
2127                             0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2128                             0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2129                             0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2130                             0x0e,   0x21,   0x22,
2131                             0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2132                             0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2133                             0x42,
2134                             0x5c,   0x75,   0x30,   0x39,   0x30,   0x32,
2135                             };
2136            static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2137                0,0,0,0,0,0,0,
2138                1,1,1,1,1,1,1,
2139                1,1,1,1,1,1,
2140                3,3,3,
2141                4,4,4,4,4,4,4,
2142                4,4,4,4,4,4,
2143                6,
2144                7,7,7,7,7,7
2145            };
2146            if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]),
2147                to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn",
2148                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2149                log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2150
2151        }
2152        {
2153            static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2154            static const uint8_t to_iso_2022_cn6_v2[]={
2155                                0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2156                                0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2157                                0x0e,   0x21,   0x22,
2158                                0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2159                                0x42,
2160                                0x7b,   0x55,   0x2b,   0x30,   0x39,   0x30,   0x32,   0x7d
2161                             };
2162            static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2163                    0,  0,  0,  0,  0,  0,  0,
2164                    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2165                    3,  3,  3,
2166                    4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2167                    6,
2168                    7,  7,  7,  7,  7,  7,  7,  7,
2169            };
2170            if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]),
2171                to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn",
2172                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2173                log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2174
2175        }
2176        {
2177            static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2178            static const uint8_t to_iso_2022_cn7_v2[]={
2179                                0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2180                                0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2181                                0x0e,   0x21,   0x22,
2182                                0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2183                                0x42,   0x25,   0x55,   0x30,   0x39,   0x30,   0x32,
2184                            };
2185            static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2186                                0,  0,  0,  0,  0,  0,  0,
2187                                1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2188                                3,  3,  3,
2189                                4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2190                                6,
2191                                7,  7,  7,  7,  7,  7,
2192            };
2193            if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]),
2194                to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn",
2195                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2196                log_err("u-> iso-2022-cn with sub & K did not match.\n");
2197
2198        }
2199        {
2200            static const UChar iso_2022_cn_inputText8[]={
2201                                0x3000,
2202                                0xD84D, 0xDC56,
2203                                0x3001,
2204                                0xD84D, 0xDC56,
2205                                0xDBFF, 0xDFFF,
2206                                0x0042,
2207                                0x0902};
2208            static const uint8_t to_iso_2022_cn8_v2[]={
2209                                0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2210                                0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2211                                0x0e,   0x21,   0x22,
2212                                0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2213                                0x5c,   0x31,   0x30,   0x46,   0x46,   0x46,   0x46,   0x20,
2214                                0x42,
2215                                0x5c,   0x39,   0x30,   0x32,   0x20
2216                             };
2217            static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2218                    0,  0,  0,  0,  0,  0,  0,
2219                    1,  1,  1,  1,  1,  1,  1,  1,
2220                    3,  3,  3,
2221                    4,  4,  4,  4,  4,  4,  4,  4,
2222                    6,  6,  6,  6,  6,  6,  6,  6,
2223                    8,
2224                    9,  9,  9,  9,  9
2225            };
2226            if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]),
2227                to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn",
2228                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2229                log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2230
2231        }
2232        {
2233            static const uint8_t to_iso_2022_cn4_v3[]={
2234                            0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2235                            0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2236                            0x0e,   0x21,   0x22,
2237                            0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2238                            0x42
2239                             };
2240
2241
2242            static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2243                0,0,0,0,0,0,0,
2244                1,1,1,1,1,1,1,1,1,1,1,
2245
2246                3,3,3,
2247                4,4,4,4,4,4,4,4,4,4,4,
2248
2249                6
2250
2251            };
2252            if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2253                to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn",
2254                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2255            {
2256                log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2257            }
2258        }
2259        if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
2260                to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
2261                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2262            log_err("u-> iso_2022_cn with subst with value did not match.\n");
2263
2264        if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2265                to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn",
2266                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2267            log_err("u-> iso_2022_cn with subst with value did not match.\n");
2268        if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
2269                to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
2270                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2271            log_err("u-> iso_2022_kr with subst with value did not match.\n");
2272        if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]),
2273                to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr",
2274                UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2275            log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2276        if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
2277                to_hz, sizeof(to_hz), "HZ",
2278                UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2279            log_err("u-> hz with subst with value did not match.\n");
2280        if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]),
2281                to_hz2, sizeof(to_hz2), "HZ",
2282                UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2283            log_err("u-> hz with subst with value did not match.\n");
2284
2285        if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
2286                to_iscii, sizeof(to_iscii), "ISCII,version=0",
2287                UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2288            log_err("u-> iscii with subst with value did not match.\n");
2289    }
2290#endif
2291
2292    log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2293    /*to Unicode*/
2294    {
2295#if !UCONFIG_NO_LEGACY_CONVERSION
2296        static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2297            0x81, 0xad, /*unassigned*/
2298            0x89, 0xd3 };
2299        static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2300            0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2301            0x7B87};
2302        static const int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2303
2304        /* EUC_JP*/
2305        static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2306            /* BEGIN android-changed */
2307            /* Android uses a different EUC-JP table. We change this byte sequence,
2308             * choosing one that is unassigned in both tables. */
2309            0x8f, 0xa1, 0xa1,  /*unassigned*/
2310            /* 0x8f, 0xda, 0xa1, */ /*unassigned*/
2311            /* END android-changed */
2312           0x8e, 0xe0,
2313        };
2314        static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2315            /* BEGIN android-changed */
2316            /* Android uses a different EUC-JP table. We change the expected output,
2317             * matching the byte sequence modified above. */
2318            0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x41, 0x31, 0x25, 0x58, 0x41, 0x31,
2319            /* 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31, */
2320            /* END android-changed */
2321            0x00a2 };
2322        static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2323            6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2324            9,
2325        };
2326
2327        /*EUC_TW*/
2328        static const uint8_t sampleTxt_euc_tw[]={
2329            0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2330            0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2331            0xe6, 0xca, 0x8a,
2332        };
2333        static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2334             0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2335             0x8706, 0x8a, };
2336        static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2337             7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2338             11, 13};
2339
2340        /*iso-2022-jp*/
2341        static const uint8_t sampleTxt_iso_2022_jp[]={
2342            0x1b,   0x28,   0x42,   0x41,
2343            0x1b,   0x24,   0x42,   0x2A, 0x44, /*unassigned*/
2344            0x1b,   0x28,   0x42,   0x42,
2345
2346        };
2347        static const UChar iso_2022_jptoUnicode[]={    0x41,0x25,0x58,0x32,0x41,0x25,0x58,0x34,0x34, 0x42 };
2348        static const int32_t from_iso_2022_jpOffs [] ={  3,   7,   7,   7,   7,   7,   7,   7,   7,    12   };
2349
2350        /*iso-2022-cn*/
2351        static const uint8_t sampleTxt_iso_2022_cn[]={
2352            0x0f,   0x41,   0x44,
2353            0x1B,   0x24,   0x29,   0x47,
2354            0x0E,   0x40,   0x6c, /*unassigned*/
2355            0x0f,   0x42,
2356
2357        };
2358        static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2359        static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   8,   8,   8,   8,   8,   8,   8,  8,    11   };
2360
2361        /*iso-2022-kr*/
2362        static const uint8_t sampleTxt_iso_2022_kr[]={
2363          0x1b, 0x24, 0x29,  0x43,
2364          0x41,
2365          0x0E, 0x7f, 0x1E,
2366          0x0e, 0x25, 0x50,
2367          0x0f, 0x51,
2368          0x42, 0x43,
2369
2370        };
2371        static const UChar iso_2022_krtoUnicode[]={     0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2372        static const int32_t from_iso_2022_krOffs [] ={  4,   6,   6,   6,   6,   6,   6,   6,   6,    9,    12,   13  , 14 };
2373
2374        /*hz*/
2375        static const uint8_t sampleTxt_hz[]={
2376            0x41,
2377            0x7e,   0x7b,   0x26,   0x30,
2378            0x7f,   0x1E, /*unassigned*/
2379            0x26,   0x30,
2380            0x7e,   0x7d,   0x42,
2381            0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
2382            0x7e,   0x7d,   0x42,
2383        };
2384        static const UChar hztoUnicode[]={
2385            0x41,
2386            0x03a0,
2387            0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2388            0x03A0,
2389            0x42,
2390            0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2391            0x42,};
2392
2393        static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18,  };
2394
2395
2396        /*iscii*/
2397        static const uint8_t sampleTxt_iscii[]={
2398            0x41,
2399            0x30,
2400            0xEB, /*unassigned*/
2401            0xa3,
2402            0x42,
2403            0xEC, /*unassigned*/
2404            0x42,
2405        };
2406        static const UChar isciitoUnicode[]={
2407            0x41,
2408            0x30,
2409            0x25,  0x58,  0x45, 0x42,
2410            0x0903,
2411            0x42,
2412            0x25,  0x58,  0x45, 0x43,
2413            0x42,};
2414
2415        static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6  };
2416#endif
2417
2418        /*UTF8*/
2419        static const uint8_t sampleTxtUTF8[]={
2420            0x20, 0x64, 0x50,
2421            0xC2, 0x7E, /* truncated char */
2422            0x20,
2423            0xE0, 0xB5, 0x7E, /* truncated char */
2424            0x40,
2425        };
2426        static const UChar UTF8ToUnicode[]={
2427            0x0020, 0x0064, 0x0050,
2428            0x0025, 0x0058, 0x0043, 0x0032, 0x007E,  /* \xC2~ */
2429            0x0020,
2430            0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2431            0x0040
2432        };
2433        static const int32_t fromUTF8[] = {
2434            0, 1, 2,
2435            3, 3, 3, 3, 4,
2436            5,
2437            6, 6, 6, 6, 6, 6, 6, 6, 8,
2438            9
2439        };
2440        static const UChar UTF8ToUnicodeXML_DEC[]={
2441            0x0020, 0x0064, 0x0050,
2442            0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E,  /* &#194;~ */
2443            0x0020,
2444            0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2445            0x0040
2446        };
2447        static const int32_t fromUTF8XML_DEC[] = {
2448            0, 1, 2,
2449            3, 3, 3, 3, 3, 3, 4,
2450            5,
2451            6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2452            9
2453        };
2454
2455
2456#if !UCONFIG_NO_LEGACY_CONVERSION
2457        if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU),
2458                 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
2459                UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2460            log_err("ibm-943->u with substitute with value did not match.\n");
2461
2462        if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP),
2463                 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"euc-jp",
2464                UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2465            log_err("euc-jp->u with substitute with value did not match.\n");
2466
2467        if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
2468                 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
2469                UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2470            log_err("euc-tw->u with substitute with value did not match.\n");
2471
2472        if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2473                 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2474                UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2475            log_err("iso-2022-jp->u with substitute with value did not match.\n");
2476
2477        if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2478                 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2479                UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2480            log_err("iso-2022-jp->u with substitute with value did not match.\n");
2481
2482        {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2483            {
2484                static const UChar iso_2022_jptoUnicodeDec[]={
2485                                                  0x0041,
2486                                                  0x0026,   0x0023,   0x0034,   0x0032,   0x003b,
2487                                                  0x0026,   0x0023,   0x0036,   0x0038,   0x003b,
2488                                                  0x0042 };
2489                static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12,  };
2490                if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2491                     iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2492                    UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2493                log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2494            }
2495            {
2496                static const UChar iso_2022_jptoUnicodeHex[]={
2497                                                  0x0041,
2498                                                  0x0026, 0x0023, 0x0078, 0x0032, 0x0041, 0x003b,
2499                                                  0x0026, 0x0023, 0x0078, 0x0034, 0x0034, 0x003b,
2500                                                  0x0042 };
2501                static const int32_t from_iso_2022_jpOffsHex [] ={  3,7,7,7,7,7,7,7,7,7,7,7,7,12   };
2502                if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2503                     iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2504                    UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2505                log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2506            }
2507            {
2508                static const UChar iso_2022_jptoUnicodeC[]={
2509                                                0x0041,
2510                                                0x005C, 0x0078, 0x0032, 0x0041,
2511                                                0x005C, 0x0078, 0x0034, 0x0034,
2512                                                0x0042 };
2513                int32_t from_iso_2022_jpOffsC [] ={  3,7,7,7,7,7,7,7,7,12   };
2514                if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2515                     iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2516                    UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2517                log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2518            }
2519        }
2520        if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
2521                 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
2522                UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2523            log_err("iso-2022-cn->u with substitute with value did not match.\n");
2524
2525        if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
2526                 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
2527                UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2528            log_err("iso-2022-kr->u with substitute with value did not match.\n");
2529
2530         if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
2531                 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
2532                UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2533            log_err("hz->u with substitute with value did not match.\n");
2534
2535         if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
2536                 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
2537                UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2538            log_err("ISCII ->u with substitute with value did not match.\n");
2539#endif
2540
2541        if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2542                UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8",
2543                UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2544            log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2545        if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2546                UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8",
2547                UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2548            log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2549    }
2550}
2551
2552#if !UCONFIG_NO_LEGACY_CONVERSION
2553static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2554{
2555    static const UChar    legalText[] =  { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2556    static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2557    static const int32_t  to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2558
2559
2560    static const uint8_t text943[] = {
2561        0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2562    static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,  0x5b57 };
2563    static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22,  0x5b57 };
2564    static const UChar toUnicode943stop[]= { 0x304b};
2565
2566    static const int32_t  fromIBM943Offssub[]  = { 0, 2, 3, 4, 5, 7 };
2567    static const int32_t  fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2568    static const int32_t  fromIBM943Offsstop[] = { 0};
2569
2570    gInBufferSize = inputsize;
2571    gOutBufferSize = outputsize;
2572    /*checking with a legal value*/
2573    if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]),
2574            templegal949, sizeof(templegal949), "ibm-949",
2575            UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2576        log_err("u-> ibm-949 with skip did not match.\n");
2577
2578    /*checking illegal value for ibm-943 with substitute*/
2579    if(!testConvertToUnicode(text943, sizeof(text943),
2580             toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2581            UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2582        log_err("ibm-943->u with subst did not match.\n");
2583    /*checking illegal value for ibm-943 with skip */
2584    if(!testConvertToUnicode(text943, sizeof(text943),
2585             toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943",
2586            UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2587        log_err("ibm-943->u with skip did not match.\n");
2588
2589    /*checking illegal value for ibm-943 with stop */
2590    if(!testConvertToUnicode(text943, sizeof(text943),
2591             toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943",
2592            UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2593        log_err("ibm-943->u with stop did not match.\n");
2594
2595}
2596
2597static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2598{
2599    static const uint8_t sampleText[] = {
2600        0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2601        0xff, 0x32, 0x33};
2602    static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2603    static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2604    /*checking illegal value for ibm-943 with substitute*/
2605    gInBufferSize = inputsize;
2606    gOutBufferSize = outputsize;
2607
2608    if(!testConvertToUnicode(sampleText, sizeof(sampleText),
2609             toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2610            UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2611        log_err("ibm-943->u with subst did not match.\n");
2612}
2613
2614static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2615{
2616    /*EBCDIC_STATEFUL*/
2617    static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2618    static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2619    static const int32_t offset_930[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    4,    4,    5,    5    };
2620/*                              s     SO    doubl       SI    sng   s     SO    fe    fe    SI    s    */
2621
2622    /*EBCDIC_STATEFUL with subChar=3f*/
2623    static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2624    static const int32_t offset_930_subvaried[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    5    };
2625    static const char mySubChar[]={ 0x3f};
2626
2627    gInBufferSize = inputsize;
2628    gOutBufferSize = outputsize;
2629
2630    if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2631        toIBM930, sizeof(toIBM930), "ibm-930",
2632        UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2633            log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2634
2635    if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2636        toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930",
2637        UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2638            log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2639}
2640#endif
2641
2642UBool testConvertFromUnicode(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
2643                const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2644                const char *mySubChar, int8_t len)
2645{
2646
2647
2648    UErrorCode status = U_ZERO_ERROR;
2649    UConverter *conv = 0;
2650    char junkout[NEW_MAX_BUFFER]; /* FIX */
2651    int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2652    const UChar *src;
2653    char *end;
2654    char *targ;
2655    int32_t *offs;
2656    int i;
2657    int32_t  realBufferSize;
2658    char *realBufferEnd;
2659    const UChar *realSourceEnd;
2660    const UChar *sourceLimit;
2661    UBool checkOffsets = TRUE;
2662    UBool doFlush;
2663    char junk[9999];
2664    char offset_str[9999];
2665    char *p;
2666    UConverterFromUCallback oldAction = NULL;
2667    const void* oldContext = NULL;
2668
2669
2670    for(i=0;i<NEW_MAX_BUFFER;i++)
2671        junkout[i] = (char)0xF0;
2672    for(i=0;i<NEW_MAX_BUFFER;i++)
2673        junokout[i] = 0xFF;
2674    setNuConvTestName(codepage, "FROM");
2675
2676    log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
2677            gOutBufferSize);
2678
2679    conv = ucnv_open(codepage, &status);
2680    if(U_FAILURE(status))
2681    {
2682        log_data_err("Couldn't open converter %s\n",codepage);
2683        return TRUE;
2684    }
2685
2686    log_verbose("Converter opened..\n");
2687
2688    /*----setting the callback routine----*/
2689    ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2690    if (U_FAILURE(status))
2691    {
2692        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2693    }
2694    /*------------------------*/
2695    /*setting the subChar*/
2696    if(mySubChar != NULL){
2697        ucnv_setSubstChars(conv, mySubChar, len, &status);
2698        if (U_FAILURE(status))  {
2699            log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2700        }
2701    }
2702    /*------------*/
2703
2704    src = source;
2705    targ = junkout;
2706    offs = junokout;
2707
2708    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2709    realBufferEnd = junkout + realBufferSize;
2710    realSourceEnd = source + sourceLen;
2711
2712    if ( gOutBufferSize != realBufferSize )
2713      checkOffsets = FALSE;
2714
2715    if( gInBufferSize != NEW_MAX_BUFFER )
2716      checkOffsets = FALSE;
2717
2718    do
2719    {
2720        end = nct_min(targ + gOutBufferSize, realBufferEnd);
2721        sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2722
2723        doFlush = (UBool)(sourceLimit == realSourceEnd);
2724
2725        if(targ == realBufferEnd)
2726        {
2727            log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2728            return FALSE;
2729        }
2730        log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2731
2732
2733        status = U_ZERO_ERROR;
2734
2735        ucnv_fromUnicode (conv,
2736                  (char **)&targ,
2737                  (const char *)end,
2738                  &src,
2739                  sourceLimit,
2740                  checkOffsets ? offs : NULL,
2741                  doFlush, /* flush if we're at the end of the input data */
2742                  &status);
2743    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2744
2745
2746    if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2747        UChar errChars[50]; /* should be sufficient */
2748        int8_t errLen = 50;
2749        UErrorCode err = U_ZERO_ERROR;
2750        const UChar* limit= NULL;
2751        const UChar* start= NULL;
2752        ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2753        if(U_FAILURE(err)){
2754            log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2755        }
2756        /* src points to limit of invalid chars */
2757        limit = src;
2758        /* length of in invalid chars should be equal to returned length*/
2759        start = src - errLen;
2760        if(u_strncmp(errChars,start,errLen)!=0){
2761            log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2762        }
2763    }
2764    /* allow failure codes for the stop callback */
2765    if(U_FAILURE(status) &&
2766       (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2767    {
2768        log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2769        return FALSE;
2770    }
2771
2772    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2773        sourceLen, targ-junkout);
2774    if(getTestOption(VERBOSITY_OPTION))
2775    {
2776
2777        junk[0] = 0;
2778        offset_str[0] = 0;
2779        for(p = junkout;p<targ;p++)
2780        {
2781            sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2782            sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2783        }
2784
2785        log_verbose(junk);
2786        printSeq(expect, expectLen);
2787        if ( checkOffsets )
2788        {
2789            log_verbose("\nOffsets:");
2790            log_verbose(offset_str);
2791        }
2792        log_verbose("\n");
2793    }
2794    ucnv_close(conv);
2795
2796
2797    if(expectLen != targ-junkout)
2798    {
2799        log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2800        log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2801        printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2802        printSeqErr(expect, expectLen);
2803        return FALSE;
2804    }
2805
2806    if (checkOffsets && (expectOffsets != 0) )
2807    {
2808        log_verbose("comparing %d offsets..\n", targ-junkout);
2809        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2810            log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2811            log_err("Got Output : ");
2812            printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2813            log_err("Got Offsets:      ");
2814            for(p=junkout;p<targ;p++)
2815                log_err("%d,", junokout[p-junkout]);
2816            log_err("\n");
2817            log_err("Expected Offsets: ");
2818            for(i=0; i<(targ-junkout); i++)
2819                log_err("%d,", expectOffsets[i]);
2820            log_err("\n");
2821            return FALSE;
2822        }
2823    }
2824
2825    if(!memcmp(junkout, expect, expectLen))
2826    {
2827        log_verbose("String matches! %s\n", gNuConvTestName);
2828        return TRUE;
2829    }
2830    else
2831    {
2832        log_err("String does not match. %s\n", gNuConvTestName);
2833        log_err("source: ");
2834        printUSeqErr(source, sourceLen);
2835        log_err("Got:      ");
2836        printSeqErr((const uint8_t *)junkout, expectLen);
2837        log_err("Expected: ");
2838        printSeqErr(expect, expectLen);
2839        return FALSE;
2840    }
2841}
2842
2843UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2844               const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2845               const char *mySubChar, int8_t len)
2846{
2847    UErrorCode status = U_ZERO_ERROR;
2848    UConverter *conv = 0;
2849    UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
2850    int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2851    const char *src;
2852    const char *realSourceEnd;
2853    const char *srcLimit;
2854    UChar *targ;
2855    UChar *end;
2856    int32_t *offs;
2857    int i;
2858    UBool   checkOffsets = TRUE;
2859    char junk[9999];
2860    char offset_str[9999];
2861    UChar *p;
2862    UConverterToUCallback oldAction = NULL;
2863    const void* oldContext = NULL;
2864
2865    int32_t   realBufferSize;
2866    UChar *realBufferEnd;
2867
2868
2869    for(i=0;i<NEW_MAX_BUFFER;i++)
2870        junkout[i] = 0xFFFE;
2871
2872    for(i=0;i<NEW_MAX_BUFFER;i++)
2873        junokout[i] = -1;
2874
2875    setNuConvTestName(codepage, "TO");
2876
2877    log_verbose("\n=========  %s\n", gNuConvTestName);
2878
2879    conv = ucnv_open(codepage, &status);
2880    if(U_FAILURE(status))
2881    {
2882        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2883        return TRUE;
2884    }
2885
2886    log_verbose("Converter opened..\n");
2887
2888    src = (const char *)source;
2889    targ = junkout;
2890    offs = junokout;
2891
2892    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2893    realBufferEnd = junkout + realBufferSize;
2894    realSourceEnd = src + sourcelen;
2895    /*----setting the callback routine----*/
2896    ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2897    if (U_FAILURE(status))
2898    {
2899        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2900    }
2901    /*-------------------------------------*/
2902    /*setting the subChar*/
2903    if(mySubChar != NULL){
2904        ucnv_setSubstChars(conv, mySubChar, len, &status);
2905        if (U_FAILURE(status))  {
2906            log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2907        }
2908    }
2909    /*------------*/
2910
2911
2912    if ( gOutBufferSize != realBufferSize )
2913        checkOffsets = FALSE;
2914
2915    if( gInBufferSize != NEW_MAX_BUFFER )
2916        checkOffsets = FALSE;
2917
2918    do
2919    {
2920        end = nct_min( targ + gOutBufferSize, realBufferEnd);
2921        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2922
2923        if(targ == realBufferEnd)
2924        {
2925            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2926            return FALSE;
2927        }
2928        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2929
2930
2931
2932        status = U_ZERO_ERROR;
2933
2934        ucnv_toUnicode (conv,
2935                &targ,
2936                end,
2937                (const char **)&src,
2938                (const char *)srcLimit,
2939                checkOffsets ? offs : NULL,
2940                (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2941                &status);
2942    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2943
2944    if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2945        char errChars[50]; /* should be sufficient */
2946        int8_t errLen = 50;
2947        UErrorCode err = U_ZERO_ERROR;
2948        const char* limit= NULL;
2949        const char* start= NULL;
2950        ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2951        if(U_FAILURE(err)){
2952            log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2953        }
2954        /* src points to limit of invalid chars */
2955        limit = src;
2956        /* length of in invalid chars should be equal to returned length*/
2957        start = src - errLen;
2958        if(uprv_strncmp(errChars,start,errLen)!=0){
2959            log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2960        }
2961    }
2962    /* allow failure codes for the stop callback */
2963    if(U_FAILURE(status) &&
2964       (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2965    {
2966        log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2967        return FALSE;
2968    }
2969
2970    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2971        sourcelen, targ-junkout);
2972    if(getTestOption(VERBOSITY_OPTION))
2973    {
2974
2975        junk[0] = 0;
2976        offset_str[0] = 0;
2977
2978        for(p = junkout;p<targ;p++)
2979        {
2980            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2981            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2982        }
2983
2984        log_verbose(junk);
2985        printUSeq(expect, expectlen);
2986        if ( checkOffsets )
2987        {
2988            log_verbose("\nOffsets:");
2989            log_verbose(offset_str);
2990        }
2991        log_verbose("\n");
2992    }
2993    ucnv_close(conv);
2994
2995    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2996
2997    if (checkOffsets && (expectOffsets != 0))
2998    {
2999        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3000        {
3001            log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3002            log_err("Got offsets:      ");
3003            for(p=junkout;p<targ;p++)
3004                log_err("  %2d,", junokout[p-junkout]);
3005            log_err("\n");
3006            log_err("Expected offsets: ");
3007            for(i=0; i<(targ-junkout); i++)
3008                log_err("  %2d,", expectOffsets[i]);
3009            log_err("\n");
3010            log_err("Got output:       ");
3011            for(i=0; i<(targ-junkout); i++)
3012                log_err("0x%04x,", junkout[i]);
3013            log_err("\n");
3014            log_err("From source:      ");
3015            for(i=0; i<(src-(const char *)source); i++)
3016                log_err("  0x%02x,", (unsigned char)source[i]);
3017            log_err("\n");
3018        }
3019    }
3020
3021    if(!memcmp(junkout, expect, expectlen*2))
3022    {
3023        log_verbose("Matches!\n");
3024        return TRUE;
3025    }
3026    else
3027    {
3028        log_err("String does not match. %s\n", gNuConvTestName);
3029        log_verbose("String does not match. %s\n", gNuConvTestName);
3030        log_err("Got:      ");
3031        printUSeqErr(junkout, expectlen);
3032        log_err("Expected: ");
3033        printUSeqErr(expect, expectlen);
3034        log_err("\n");
3035        return FALSE;
3036    }
3037}
3038
3039UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
3040                const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3041                const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3042{
3043
3044
3045    UErrorCode status = U_ZERO_ERROR;
3046    UConverter *conv = 0;
3047    char junkout[NEW_MAX_BUFFER]; /* FIX */
3048    int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3049    const UChar *src;
3050    char *end;
3051    char *targ;
3052    int32_t *offs;
3053    int i;
3054    int32_t  realBufferSize;
3055    char *realBufferEnd;
3056    const UChar *realSourceEnd;
3057    const UChar *sourceLimit;
3058    UBool checkOffsets = TRUE;
3059    UBool doFlush;
3060    char junk[9999];
3061    char offset_str[9999];
3062    char *p;
3063    UConverterFromUCallback oldAction = NULL;
3064    const void* oldContext = NULL;
3065
3066
3067    for(i=0;i<NEW_MAX_BUFFER;i++)
3068        junkout[i] = (char)0xF0;
3069    for(i=0;i<NEW_MAX_BUFFER;i++)
3070        junokout[i] = 0xFF;
3071    setNuConvTestName(codepage, "FROM");
3072
3073    log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
3074            gOutBufferSize);
3075
3076    conv = ucnv_open(codepage, &status);
3077    if(U_FAILURE(status))
3078    {
3079        log_data_err("Couldn't open converter %s\n",codepage);
3080        return TRUE; /* Because the err has already been logged. */
3081    }
3082
3083    log_verbose("Converter opened..\n");
3084
3085    /*----setting the callback routine----*/
3086    ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3087    if (U_FAILURE(status))
3088    {
3089        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3090    }
3091    /*------------------------*/
3092    /*setting the subChar*/
3093    if(mySubChar != NULL){
3094        ucnv_setSubstChars(conv, mySubChar, len, &status);
3095        if (U_FAILURE(status))  {
3096            log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3097        }
3098    }
3099    /*------------*/
3100
3101    src = source;
3102    targ = junkout;
3103    offs = junokout;
3104
3105    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3106    realBufferEnd = junkout + realBufferSize;
3107    realSourceEnd = source + sourceLen;
3108
3109    if ( gOutBufferSize != realBufferSize )
3110      checkOffsets = FALSE;
3111
3112    if( gInBufferSize != NEW_MAX_BUFFER )
3113      checkOffsets = FALSE;
3114
3115    do
3116    {
3117        end = nct_min(targ + gOutBufferSize, realBufferEnd);
3118        sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3119
3120        doFlush = (UBool)(sourceLimit == realSourceEnd);
3121
3122        if(targ == realBufferEnd)
3123        {
3124            log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3125            return FALSE;
3126        }
3127        log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3128
3129
3130        status = U_ZERO_ERROR;
3131
3132        ucnv_fromUnicode (conv,
3133                  (char **)&targ,
3134                  (const char *)end,
3135                  &src,
3136                  sourceLimit,
3137                  checkOffsets ? offs : NULL,
3138                  doFlush, /* flush if we're at the end of the input data */
3139                  &status);
3140    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3141
3142    /* allow failure codes for the stop callback */
3143    if(U_FAILURE(status) && status != expectedError)
3144    {
3145        log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3146        return FALSE;
3147    }
3148
3149    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3150        sourceLen, targ-junkout);
3151    if(getTestOption(VERBOSITY_OPTION))
3152    {
3153
3154        junk[0] = 0;
3155        offset_str[0] = 0;
3156        for(p = junkout;p<targ;p++)
3157        {
3158            sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3159            sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3160        }
3161
3162        log_verbose(junk);
3163        printSeq(expect, expectLen);
3164        if ( checkOffsets )
3165        {
3166            log_verbose("\nOffsets:");
3167            log_verbose(offset_str);
3168        }
3169        log_verbose("\n");
3170    }
3171    ucnv_close(conv);
3172
3173
3174    if(expectLen != targ-junkout)
3175    {
3176        log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3177        log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3178        printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3179        printSeqErr(expect, expectLen);
3180        return FALSE;
3181    }
3182
3183    if (checkOffsets && (expectOffsets != 0) )
3184    {
3185        log_verbose("comparing %d offsets..\n", targ-junkout);
3186        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3187            log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3188            log_err("Got Output : ");
3189            printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3190            log_err("Got Offsets:      ");
3191            for(p=junkout;p<targ;p++)
3192                log_err("%d,", junokout[p-junkout]);
3193            log_err("\n");
3194            log_err("Expected Offsets: ");
3195            for(i=0; i<(targ-junkout); i++)
3196                log_err("%d,", expectOffsets[i]);
3197            log_err("\n");
3198            return FALSE;
3199        }
3200    }
3201
3202    if(!memcmp(junkout, expect, expectLen))
3203    {
3204        log_verbose("String matches! %s\n", gNuConvTestName);
3205        return TRUE;
3206    }
3207    else
3208    {
3209        log_err("String does not match. %s\n", gNuConvTestName);
3210        log_err("source: ");
3211        printUSeqErr(source, sourceLen);
3212        log_err("Got:      ");
3213        printSeqErr((const uint8_t *)junkout, expectLen);
3214        log_err("Expected: ");
3215        printSeqErr(expect, expectLen);
3216        return FALSE;
3217    }
3218}
3219UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3220               const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3221               const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3222{
3223    UErrorCode status = U_ZERO_ERROR;
3224    UConverter *conv = 0;
3225    UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
3226    int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3227    const char *src;
3228    const char *realSourceEnd;
3229    const char *srcLimit;
3230    UChar *targ;
3231    UChar *end;
3232    int32_t *offs;
3233    int i;
3234    UBool   checkOffsets = TRUE;
3235    char junk[9999];
3236    char offset_str[9999];
3237    UChar *p;
3238    UConverterToUCallback oldAction = NULL;
3239    const void* oldContext = NULL;
3240
3241    int32_t   realBufferSize;
3242    UChar *realBufferEnd;
3243
3244
3245    for(i=0;i<NEW_MAX_BUFFER;i++)
3246        junkout[i] = 0xFFFE;
3247
3248    for(i=0;i<NEW_MAX_BUFFER;i++)
3249        junokout[i] = -1;
3250
3251    setNuConvTestName(codepage, "TO");
3252
3253    log_verbose("\n=========  %s\n", gNuConvTestName);
3254
3255    conv = ucnv_open(codepage, &status);
3256    if(U_FAILURE(status))
3257    {
3258        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3259        return TRUE;
3260    }
3261
3262    log_verbose("Converter opened..\n");
3263
3264    src = (const char *)source;
3265    targ = junkout;
3266    offs = junokout;
3267
3268    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3269    realBufferEnd = junkout + realBufferSize;
3270    realSourceEnd = src + sourcelen;
3271    /*----setting the callback routine----*/
3272    ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3273    if (U_FAILURE(status))
3274    {
3275        log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3276    }
3277    /*-------------------------------------*/
3278    /*setting the subChar*/
3279    if(mySubChar != NULL){
3280        ucnv_setSubstChars(conv, mySubChar, len, &status);
3281        if (U_FAILURE(status))  {
3282            log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3283        }
3284    }
3285    /*------------*/
3286
3287
3288    if ( gOutBufferSize != realBufferSize )
3289        checkOffsets = FALSE;
3290
3291    if( gInBufferSize != NEW_MAX_BUFFER )
3292        checkOffsets = FALSE;
3293
3294    do
3295    {
3296        end = nct_min( targ + gOutBufferSize, realBufferEnd);
3297        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3298
3299        if(targ == realBufferEnd)
3300        {
3301            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3302            return FALSE;
3303        }
3304        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3305
3306
3307
3308        status = U_ZERO_ERROR;
3309
3310        ucnv_toUnicode (conv,
3311                &targ,
3312                end,
3313                (const char **)&src,
3314                (const char *)srcLimit,
3315                checkOffsets ? offs : NULL,
3316                (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3317                &status);
3318    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3319
3320    /* allow failure codes for the stop callback */
3321    if(U_FAILURE(status) && status!=expectedError)
3322    {
3323        log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3324        return FALSE;
3325    }
3326
3327    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3328        sourcelen, targ-junkout);
3329    if(getTestOption(VERBOSITY_OPTION))
3330    {
3331
3332        junk[0] = 0;
3333        offset_str[0] = 0;
3334
3335        for(p = junkout;p<targ;p++)
3336        {
3337            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3338            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3339        }
3340
3341        log_verbose(junk);
3342        printUSeq(expect, expectlen);
3343        if ( checkOffsets )
3344        {
3345            log_verbose("\nOffsets:");
3346            log_verbose(offset_str);
3347        }
3348        log_verbose("\n");
3349    }
3350    ucnv_close(conv);
3351
3352    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3353
3354    if (checkOffsets && (expectOffsets != 0))
3355    {
3356        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3357        {
3358            log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3359            log_err("Got offsets:      ");
3360            for(p=junkout;p<targ;p++)
3361                log_err("  %2d,", junokout[p-junkout]);
3362            log_err("\n");
3363            log_err("Expected offsets: ");
3364            for(i=0; i<(targ-junkout); i++)
3365                log_err("  %2d,", expectOffsets[i]);
3366            log_err("\n");
3367            log_err("Got output:       ");
3368            for(i=0; i<(targ-junkout); i++)
3369                log_err("0x%04x,", junkout[i]);
3370            log_err("\n");
3371            log_err("From source:      ");
3372            for(i=0; i<(src-(const char *)source); i++)
3373                log_err("  0x%02x,", (unsigned char)source[i]);
3374            log_err("\n");
3375        }
3376    }
3377
3378    if(!memcmp(junkout, expect, expectlen*2))
3379    {
3380        log_verbose("Matches!\n");
3381        return TRUE;
3382    }
3383    else
3384    {
3385        log_err("String does not match. %s\n", gNuConvTestName);
3386        log_verbose("String does not match. %s\n", gNuConvTestName);
3387        log_err("Got:      ");
3388        printUSeqErr(junkout, expectlen);
3389        log_err("Expected: ");
3390        printUSeqErr(expect, expectlen);
3391        log_err("\n");
3392        return FALSE;
3393    }
3394}
3395
3396static void TestCallBackFailure(void) {
3397    UErrorCode status = U_USELESS_COLLATOR_ERROR;
3398    ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3399    if (status != U_USELESS_COLLATOR_ERROR) {
3400        log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3401    }
3402    ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3403    if (status != U_USELESS_COLLATOR_ERROR) {
3404        log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3405    }
3406    ucnv_cbFromUWriteSub(NULL, -1, &status);
3407    if (status != U_USELESS_COLLATOR_ERROR) {
3408        log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3409    }
3410    ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3411    if (status != U_USELESS_COLLATOR_ERROR) {
3412        log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3413    }
3414}
3415