1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File nucnvtst.c
9*
10* Modification History:
11*        Name                     Description
12*    Steven R. Loomis     7/8/1999      Adding input buffer test
13********************************************************************************
14*/
15#include <stdio.h>
16#include "cstring.h"
17#include "unicode/uloc.h"
18#include "unicode/ucnv.h"
19#include "unicode/ucnv_err.h"
20#include "unicode/ucnv_cb.h"
21#include "cintltst.h"
22#include "unicode/utypes.h"
23#include "unicode/ustring.h"
24#include "unicode/ucol.h"
25#include "unicode/utf16.h"
26#include "cmemory.h"
27#include "nucnvtst.h"
28
29static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
30static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
31#if !UCONFIG_NO_COLLATION
32static void TestJitterbug981(void);
33#endif
34#if !UCONFIG_NO_LEGACY_CONVERSION
35static void TestJitterbug1293(void);
36#endif
37static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
38static void TestConverterTypesAndStarters(void);
39static void TestAmbiguous(void);
40static void TestSignatureDetection(void);
41static void TestUTF7(void);
42static void TestIMAP(void);
43static void TestUTF8(void);
44static void TestCESU8(void);
45static void TestUTF16(void);
46static void TestUTF16BE(void);
47static void TestUTF16LE(void);
48static void TestUTF32(void);
49static void TestUTF32BE(void);
50static void TestUTF32LE(void);
51static void TestLATIN1(void);
52
53#if !UCONFIG_NO_LEGACY_CONVERSION
54static void TestSBCS(void);
55static void TestDBCS(void);
56static void TestMBCS(void);
57#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
58static void TestICCRunout(void);
59#endif
60
61#ifdef U_ENABLE_GENERIC_ISO_2022
62static void TestISO_2022(void);
63#endif
64
65static void TestISO_2022_JP(void);
66static void TestISO_2022_JP_1(void);
67static void TestISO_2022_JP_2(void);
68static void TestISO_2022_KR(void);
69static void TestISO_2022_KR_1(void);
70static void TestISO_2022_CN(void);
71#if 0
72   /*
73    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
74    */
75static void TestISO_2022_CN_EXT(void);
76#endif
77static void TestJIS(void);
78static void TestHZ(void);
79#endif
80
81static void TestSCSU(void);
82
83#if !UCONFIG_NO_LEGACY_CONVERSION
84static void TestEBCDIC_STATEFUL(void);
85static void TestGB18030(void);
86static void TestLMBCS(void);
87static void TestJitterbug255(void);
88static void TestEBCDICUS4XML(void);
89#if 0
90   /*
91    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
92    */
93static void TestJitterbug915(void);
94#endif
95static void TestISCII(void);
96
97static void TestCoverageMBCS(void);
98static void TestJitterbug2346(void);
99static void TestJitterbug2411(void);
100static void TestJB5275(void);
101static void TestJB5275_1(void);
102static void TestJitterbug6175(void);
103
104static void TestIsFixedWidth(void);
105#endif
106
107static void TestInBufSizes(void);
108
109static void TestRoundTrippingAllUTF(void);
110static void TestConv(const uint16_t in[],
111                     int len,
112                     const char* conv,
113                     const char* lang,
114                     char byteArr[],
115                     int byteArrLen);
116
117/* open a converter, using test data if it begins with '@' */
118static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
119
120
121#define NEW_MAX_BUFFER 999
122
123static int32_t  gInBufferSize = NEW_MAX_BUFFER;
124static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
125static char     gNuConvTestName[1024];
126
127#define nct_min(x,y)  ((x<y) ? x : y)
128
129static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
130{
131  if(cnv && cnv[0] == '@') {
132    return ucnv_openPackage(loadTestData(err), cnv+1, err);
133  } else {
134    return ucnv_open(cnv, err);
135  }
136}
137
138static void printSeq(const unsigned char* a, int len)
139{
140    int i=0;
141    log_verbose("{");
142    while (i<len)
143        log_verbose("0x%02x ", a[i++]);
144    log_verbose("}\n");
145}
146
147static void printUSeq(const UChar* a, int len)
148{
149    int i=0;
150    log_verbose("{U+");
151    while (i<len) log_verbose("0x%04x ", a[i++]);
152    log_verbose("}\n");
153}
154
155static void printSeqErr(const unsigned char* a, int len)
156{
157    int i=0;
158    fprintf(stderr, "{");
159    while (i<len)
160        fprintf(stderr, "0x%02x ", a[i++]);
161    fprintf(stderr, "}\n");
162}
163
164static void printUSeqErr(const UChar* a, int len)
165{
166    int i=0;
167    fprintf(stderr, "{U+");
168    while (i<len)
169        fprintf(stderr, "0x%04x ", a[i++]);
170    fprintf(stderr,"}\n");
171}
172
173static void
174TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
175{
176     const char* s0;
177     const char* s=(char*)source;
178     const int32_t *r=results;
179     UErrorCode errorCode=U_ZERO_ERROR;
180     UChar32 c;
181
182     while(s<limit) {
183        s0=s;
184        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
185        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
186            break; /* no more significant input */
187        } else if(U_FAILURE(errorCode)) {
188            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
189            break;
190        } else if(
191            /* test the expected number of input bytes only if >=0 */
192            (*r>=0 && (int32_t)(s-s0)!=*r) ||
193            c!=*(r+1)
194        ) {
195            log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
196                message, c, (s-s0), *(r+1), *r);
197            break;
198        }
199        r+=2;
200    }
201}
202
203static void
204TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
205{
206     const char* s=(char*)source;
207     UErrorCode errorCode=U_ZERO_ERROR;
208     uint32_t c;
209     c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
210     if(errorCode != expected){
211        log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
212     }
213     if(c != 0xFFFD && c != 0xffff){
214        log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
215     }
216
217}
218
219static void TestInBufSizes(void)
220{
221  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
222#if 1
223  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
224  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
225  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
226  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
227  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
228  TestNewConvertWithBufferSizes(1,1);
229  TestNewConvertWithBufferSizes(2,3);
230  TestNewConvertWithBufferSizes(3,2);
231#endif
232}
233
234static void TestOutBufSizes(void)
235{
236#if 1
237  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
238  TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
239  TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
240  TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
241  TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
242  TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
243
244#endif
245}
246
247
248void addTestNewConvert(TestNode** root)
249{
250#if !UCONFIG_NO_FILE_IO
251   addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
252   addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
253#endif
254   addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
255   addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
256   addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
257   addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
258   addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
259   addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
260
261   /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
262   addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
263   addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
264   addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
265   addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
266   addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
267   addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
268   addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
269
270#if !UCONFIG_NO_LEGACY_CONVERSION
271   addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
272#endif
273
274   addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
275
276#if !UCONFIG_NO_LEGACY_CONVERSION
277   addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
278#if !UCONFIG_NO_FILE_IO
279   addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
280   addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
281#endif
282   addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
283
284#ifdef U_ENABLE_GENERIC_ISO_2022
285   addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
286#endif
287
288   addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
289   addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
290   addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
291   // android-changed (no have ISO_2022_JP_2) -- addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
292   addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
293   addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
294   // android-changed (no ISO-2022-CN) -- addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
295   /*
296    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
297   addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
298   addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
299    */
300   addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
301#endif
302
303   addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
304
305#if !UCONFIG_NO_LEGACY_CONVERSION
306   addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
307   addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
308   addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
309   addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
310   addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
311   addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
312   addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
313#if !UCONFIG_NO_COLLATION
314   // android-removed (no collation tailoring rules)  -- addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
315#endif
316
317   addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
318#endif
319
320
321#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
322   addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
323#endif
324
325   addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
326
327#if !UCONFIG_NO_LEGACY_CONVERSION
328   addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
329   addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
330   // android-removed (no full ISO2022 CJK tables)  -- addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
331   addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
332#endif
333}
334
335
336/* Note that this test already makes use of statics, so it's not really
337   multithread safe.
338   This convenience function lets us make the error messages actually useful.
339*/
340
341static void setNuConvTestName(const char *codepage, const char *direction)
342{
343    sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
344        codepage,
345        direction,
346        (int)gInBufferSize,
347        (int)gOutBufferSize);
348}
349
350typedef enum
351{
352  TC_OK       = 0,  /* test was OK */
353  TC_MISMATCH = 1,  /* Match failed - err was printed */
354  TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
355} ETestConvertResult;
356
357/* Note: This function uses global variables and it will not do offset
358checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
359static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
360                const char *codepage, const int32_t *expectOffsets , UBool useFallback)
361{
362    UErrorCode status = U_ZERO_ERROR;
363    UConverter *conv = 0;
364    char    junkout[NEW_MAX_BUFFER]; /* FIX */
365    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
366    char *p;
367    const UChar *src;
368    char *end;
369    char *targ;
370    int32_t *offs;
371    int i;
372    int32_t   realBufferSize;
373    char *realBufferEnd;
374    const UChar *realSourceEnd;
375    const UChar *sourceLimit;
376    UBool checkOffsets = TRUE;
377    UBool doFlush;
378
379    for(i=0;i<NEW_MAX_BUFFER;i++)
380        junkout[i] = (char)0xF0;
381    for(i=0;i<NEW_MAX_BUFFER;i++)
382        junokout[i] = 0xFF;
383
384    setNuConvTestName(codepage, "FROM");
385
386    log_verbose("\n=========  %s\n", gNuConvTestName);
387
388    conv = my_ucnv_open(codepage, &status);
389
390    if(U_FAILURE(status))
391    {
392        log_data_err("Couldn't open converter %s\n",codepage);
393        return TC_FAIL;
394    }
395    if(useFallback){
396        ucnv_setFallback(conv,useFallback);
397    }
398
399    log_verbose("Converter opened..\n");
400
401    src = source;
402    targ = junkout;
403    offs = junokout;
404
405    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
406    realBufferEnd = junkout + realBufferSize;
407    realSourceEnd = source + sourceLen;
408
409    if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
410        checkOffsets = FALSE;
411
412    do
413    {
414      end = nct_min(targ + gOutBufferSize, realBufferEnd);
415      sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
416
417      doFlush = (UBool)(sourceLimit == realSourceEnd);
418
419      if(targ == realBufferEnd) {
420        log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
421        return TC_FAIL;
422      }
423      log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
424
425
426      status = U_ZERO_ERROR;
427
428      ucnv_fromUnicode (conv,
429                        &targ,
430                        end,
431                        &src,
432                        sourceLimit,
433                        checkOffsets ? offs : NULL,
434                        doFlush, /* flush if we're at the end of the input data */
435                        &status);
436    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
437
438    if(U_FAILURE(status)) {
439      log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
440      return TC_FAIL;
441    }
442
443    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
444                sourceLen, targ-junkout);
445
446    if(getTestOption(VERBOSITY_OPTION))
447    {
448      char junk[9999];
449      char offset_str[9999];
450      char *ptr;
451
452      junk[0] = 0;
453      offset_str[0] = 0;
454      for(ptr = junkout;ptr<targ;ptr++) {
455        sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
456        sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
457      }
458
459      log_verbose(junk);
460      printSeq((const uint8_t *)expect, expectLen);
461      if ( checkOffsets ) {
462        log_verbose("\nOffsets:");
463        log_verbose(offset_str);
464      }
465      log_verbose("\n");
466    }
467    ucnv_close(conv);
468
469    if(expectLen != targ-junkout) {
470      log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
471      log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
472      fprintf(stderr, "Got:\n");
473      printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
474      fprintf(stderr, "Expected:\n");
475      printSeqErr((const unsigned char*)expect, expectLen);
476      return TC_MISMATCH;
477    }
478
479    if (checkOffsets && (expectOffsets != 0) ) {
480      log_verbose("comparing %d offsets..\n", targ-junkout);
481      if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
482        log_err("did not get the expected offsets. %s\n", gNuConvTestName);
483        printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
484        log_err("\n");
485        log_err("Got  :     ");
486        for(p=junkout;p<targ;p++) {
487          log_err("%d,", junokout[p-junkout]);
488        }
489        log_err("\n");
490        log_err("Expected:  ");
491        for(i=0; i<(targ-junkout); i++) {
492          log_err("%d,", expectOffsets[i]);
493        }
494        log_err("\n");
495      }
496    }
497
498    log_verbose("comparing..\n");
499    if(!memcmp(junkout, expect, expectLen)) {
500      log_verbose("Matches!\n");
501      return TC_OK;
502    } else {
503      log_err("String does not match u->%s\n", gNuConvTestName);
504      printUSeqErr(source, sourceLen);
505      fprintf(stderr, "Got:\n");
506      printSeqErr((const unsigned char *)junkout, expectLen);
507      fprintf(stderr, "Expected:\n");
508      printSeqErr((const unsigned char *)expect, expectLen);
509
510      return TC_MISMATCH;
511    }
512}
513
514/* Note: This function uses global variables and it will not do offset
515checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
516static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
517                                          const char *codepage, const int32_t *expectOffsets, UBool useFallback)
518{
519    UErrorCode status = U_ZERO_ERROR;
520    UConverter *conv = 0;
521    UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
522    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
523    const char *src;
524    const char *realSourceEnd;
525    const char *srcLimit;
526    UChar *p;
527    UChar *targ;
528    UChar *end;
529    int32_t *offs;
530    int i;
531    UBool   checkOffsets = TRUE;
532
533    int32_t   realBufferSize;
534    UChar *realBufferEnd;
535
536
537    for(i=0;i<NEW_MAX_BUFFER;i++)
538        junkout[i] = 0xFFFE;
539
540    for(i=0;i<NEW_MAX_BUFFER;i++)
541        junokout[i] = -1;
542
543    setNuConvTestName(codepage, "TO");
544
545    log_verbose("\n=========  %s\n", gNuConvTestName);
546
547    conv = my_ucnv_open(codepage, &status);
548
549    if(U_FAILURE(status))
550    {
551        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
552        return TC_FAIL;
553    }
554    if(useFallback){
555        ucnv_setFallback(conv,useFallback);
556    }
557    log_verbose("Converter opened..\n");
558
559    src = (const char *)source;
560    targ = junkout;
561    offs = junokout;
562
563    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
564    realBufferEnd = junkout + realBufferSize;
565    realSourceEnd = src + sourcelen;
566
567    if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
568        checkOffsets = FALSE;
569
570    do
571    {
572        end = nct_min( targ + gOutBufferSize, realBufferEnd);
573        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
574
575        if(targ == realBufferEnd)
576        {
577            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
578            return TC_FAIL;
579        }
580        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
581
582        /* oldTarg = targ; */
583
584        status = U_ZERO_ERROR;
585
586        ucnv_toUnicode (conv,
587                &targ,
588                end,
589                &src,
590                srcLimit,
591                checkOffsets ? offs : NULL,
592                (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
593                &status);
594
595        /*        offs += (targ-oldTarg); */
596
597      } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
598
599    if(U_FAILURE(status))
600    {
601        log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
602        return TC_FAIL;
603    }
604
605    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
606        sourcelen, targ-junkout);
607    if(getTestOption(VERBOSITY_OPTION))
608    {
609        char junk[9999];
610        char offset_str[9999];
611        UChar *ptr;
612
613        junk[0] = 0;
614        offset_str[0] = 0;
615
616        for(ptr = junkout;ptr<targ;ptr++)
617        {
618            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
619            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
620        }
621
622        log_verbose(junk);
623        printUSeq(expect, expectlen);
624        if ( checkOffsets )
625          {
626            log_verbose("\nOffsets:");
627            log_verbose(offset_str);
628          }
629        log_verbose("\n");
630    }
631    ucnv_close(conv);
632
633    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
634
635    if (checkOffsets && (expectOffsets != 0))
636    {
637        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
638            log_err("did not get the expected offsets. %s\n",gNuConvTestName);
639            log_err("Got:      ");
640            for(p=junkout;p<targ;p++) {
641                log_err("%d,", junokout[p-junkout]);
642            }
643            log_err("\n");
644            log_err("Expected: ");
645            for(i=0; i<(targ-junkout); i++) {
646                log_err("%d,", expectOffsets[i]);
647            }
648            log_err("\n");
649            log_err("output:   ");
650            for(i=0; i<(targ-junkout); i++) {
651                log_err("%X,", junkout[i]);
652            }
653            log_err("\n");
654            log_err("input:    ");
655            for(i=0; i<(src-(const char *)source); i++) {
656                log_err("%X,", (unsigned char)source[i]);
657            }
658            log_err("\n");
659        }
660    }
661
662    if(!memcmp(junkout, expect, expectlen*2))
663    {
664        log_verbose("Matches!\n");
665        return TC_OK;
666    }
667    else
668    {
669        log_err("String does not match. %s\n", gNuConvTestName);
670        log_verbose("String does not match. %s\n", gNuConvTestName);
671        printf("\nGot:");
672        printUSeqErr(junkout, expectlen);
673        printf("\nExpected:");
674        printUSeqErr(expect, expectlen);
675        return TC_MISMATCH;
676    }
677}
678
679
680static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
681{
682/** test chars #1 */
683    /*  1 2 3  1Han 2Han 3Han .  */
684    static const UChar   sampleText[] =
685     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
686    static const UChar sampleTextRoundTripUnmappable[] =
687    { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
688
689
690    static const uint8_t expectedUTF8[] =
691     { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
692    static const int32_t toUTF8Offs[] =
693     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
694    static const int32_t fmUTF8Offs[] =
695     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
696
697#ifdef U_ENABLE_GENERIC_ISO_2022
698    /* Same as UTF8, but with ^[%B preceeding */
699    static const const uint8_t expectedISO2022[] =
700     { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
701    static const int32_t toISO2022Offs[]     =
702     { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
703       0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
704    static const int32_t fmISO2022Offs[] =
705     { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
706#endif
707
708    /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
709    static const uint8_t expectedIBM930[] =
710     { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
711    static const int32_t toIBM930Offs[] =
712     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
713    static const int32_t fmIBM930Offs[] =
714     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
715
716    /* 1 2 3 0 h1 h2 h3 . MBCS*/
717    static const uint8_t expectedIBM943[] =
718     {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
719    static const int32_t toIBM943Offs    [] =
720     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
721    static const int32_t fmIBM943Offs[] =
722     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
723
724    /* 1 2 3 0 h1 h2 h3 . DBCS*/
725    static const uint8_t expectedIBM9027[] =
726     {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
727    static const int32_t toIBM9027Offs    [] =
728     {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
729
730     /* 1 2 3 0 <?> <?> <?> . SBCS*/
731    static const uint8_t expectedIBM920[] =
732     {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
733    static const int32_t toIBM920Offs    [] =
734     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
735
736    /* 1 2 3 0 <?> <?> <?> . SBCS*/
737    static const uint8_t expectedISO88593[] =
738     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
739    static const int32_t toISO88593Offs[]     =
740     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
741
742    /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
743    static const uint8_t expectedLATIN1[] =
744     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
745    static const int32_t toLATIN1Offs[]     =
746     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
747
748
749    /*  etc */
750    static const uint8_t expectedUTF16BE[] =
751     { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
752    static const int32_t toUTF16BEOffs[]=
753     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
754    static const int32_t fmUTF16BEOffs[] =
755     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
756
757    static const uint8_t expectedUTF16LE[] =
758     { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
759    static const int32_t toUTF16LEOffs[]=
760     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
761    static const int32_t fmUTF16LEOffs[] =
762     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
763
764    static const uint8_t expectedUTF32BE[] =
765     { 0x00, 0x00, 0x00, 0x31,
766       0x00, 0x00, 0x00, 0x32,
767       0x00, 0x00, 0x00, 0x33,
768       0x00, 0x00, 0x00, 0x00,
769       0x00, 0x00, 0x4e, 0x00,
770       0x00, 0x00, 0x4e, 0x8c,
771       0x00, 0x00, 0x4e, 0x09,
772       0x00, 0x00, 0x00, 0x2e,
773       0x00, 0x02, 0x00, 0x21 };
774    static const int32_t toUTF32BEOffs[]=
775     { 0x00, 0x00, 0x00, 0x00,
776       0x01, 0x01, 0x01, 0x01,
777       0x02, 0x02, 0x02, 0x02,
778       0x03, 0x03, 0x03, 0x03,
779       0x04, 0x04, 0x04, 0x04,
780       0x05, 0x05, 0x05, 0x05,
781       0x06, 0x06, 0x06, 0x06,
782       0x07, 0x07, 0x07, 0x07,
783       0x08, 0x08, 0x08, 0x08,
784       0x08, 0x08, 0x08, 0x08 };
785    static const int32_t fmUTF32BEOffs[] =
786     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
787
788    static const uint8_t expectedUTF32LE[] =
789     { 0x31, 0x00, 0x00, 0x00,
790       0x32, 0x00, 0x00, 0x00,
791       0x33, 0x00, 0x00, 0x00,
792       0x00, 0x00, 0x00, 0x00,
793       0x00, 0x4e, 0x00, 0x00,
794       0x8c, 0x4e, 0x00, 0x00,
795       0x09, 0x4e, 0x00, 0x00,
796       0x2e, 0x00, 0x00, 0x00,
797       0x21, 0x00, 0x02, 0x00 };
798    static const int32_t toUTF32LEOffs[]=
799     { 0x00, 0x00, 0x00, 0x00,
800       0x01, 0x01, 0x01, 0x01,
801       0x02, 0x02, 0x02, 0x02,
802       0x03, 0x03, 0x03, 0x03,
803       0x04, 0x04, 0x04, 0x04,
804       0x05, 0x05, 0x05, 0x05,
805       0x06, 0x06, 0x06, 0x06,
806       0x07, 0x07, 0x07, 0x07,
807       0x08, 0x08, 0x08, 0x08,
808       0x08, 0x08, 0x08, 0x08 };
809    static const int32_t fmUTF32LEOffs[] =
810     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
811
812
813
814
815/** Test chars #2 **/
816
817    /* Sahha [health],  slashed h's */
818    static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
819    static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
820
821    /* LMBCS */
822    static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
823    static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
824    static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
825    static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
826    /*********************************** START OF CODE finally *************/
827
828    gInBufferSize = insize;
829    gOutBufferSize = outsize;
830
831    log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
832
833
834    /*UTF-8*/
835    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
836        expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
837
838    log_verbose("Test surrogate behaviour for UTF8\n");
839    {
840        static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
841        static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
842                           0xf0, 0x90, 0x90, 0x81,
843                           0xef, 0xbf, 0xbd
844        };
845        static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
846        testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
847                         expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
848
849
850    }
851
852#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
853    /*ISO-2022*/
854    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
855        expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
856#endif
857
858    /*UTF16 LE*/
859    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
860        expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
861    /*UTF16 BE*/
862    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
863        expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
864    /*UTF32 LE*/
865    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
866        expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
867    /*UTF32 BE*/
868    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
869        expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
870
871    /*LATIN_1*/
872    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
873        expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
874
875#if !UCONFIG_NO_LEGACY_CONVERSION
876    /*EBCDIC_STATEFUL*/
877    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
878        expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
879
880    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
881        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
882
883    /*MBCS*/
884
885    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
886        expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
887    /*DBCS*/
888    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
889        expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
890    /*SBCS*/
891    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
892        expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
893    /*SBCS*/
894    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
895        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
896#endif
897
898
899/****/
900
901    /*UTF-8*/
902    testConvertToU(expectedUTF8, sizeof(expectedUTF8),
903        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
904#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
905    /*ISO-2022*/
906    testConvertToU(expectedISO2022, sizeof(expectedISO2022),
907        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
908#endif
909
910    /*UTF16 LE*/
911    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
912        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
913    /*UTF16 BE*/
914    testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
915        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
916    /*UTF32 LE*/
917    testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
918        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
919    /*UTF32 BE*/
920    testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
921        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
922
923#if !UCONFIG_NO_LEGACY_CONVERSION
924    /*EBCDIC_STATEFUL*/
925    testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
926            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE);
927    /*MBCS*/
928    testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
929            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE);
930#endif
931
932    /* Try it again to make sure it still works */
933    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
934        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
935
936#if !UCONFIG_NO_LEGACY_CONVERSION
937    testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
938        malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
939
940    testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
941        expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
942
943    /*LMBCS*/
944    testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
945        expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
946    testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
947        LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
948#endif
949
950    /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
951    {
952        /* encode directly set D and set O */
953        static const uint8_t utf7[] = {
954            /*
955                Hi Mom -+Jjo--!
956                A+ImIDkQ.
957                +-
958                +ZeVnLIqe-
959            */
960            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
961            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
962            0x2b, 0x2d,
963            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
964        };
965        static const UChar unicode[] = {
966            /*
967                Hi Mom -<WHITE SMILING FACE>-!
968                A<NOT IDENTICAL TO><ALPHA>.
969                +
970                [Japanese word "nihongo"]
971            */
972            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
973            0x41, 0x2262, 0x0391, 0x2e,
974            0x2b,
975            0x65e5, 0x672c, 0x8a9e
976        };
977        static const int32_t toUnicodeOffsets[] = {
978            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
979            15, 17, 19, 23,
980            24,
981            27, 29, 32
982        };
983        static const int32_t fromUnicodeOffsets[] = {
984            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
985            11, 12, 12, 12, 13, 13, 13, 13, 14,
986            15, 15,
987            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
988        };
989
990        /* same but escaping set O (the exclamation mark) */
991        static const uint8_t utf7Restricted[] = {
992            /*
993                Hi Mom -+Jjo--+ACE-
994                A+ImIDkQ.
995                +-
996                +ZeVnLIqe-
997            */
998            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
999            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1000            0x2b, 0x2d,
1001            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1002        };
1003        static const int32_t toUnicodeOffsetsR[] = {
1004            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1005            19, 21, 23, 27,
1006            28,
1007            31, 33, 36
1008        };
1009        static const int32_t fromUnicodeOffsetsR[] = {
1010            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1011            11, 12, 12, 12, 13, 13, 13, 13, 14,
1012            15, 15,
1013            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1014        };
1015
1016        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1017
1018        testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
1019
1020        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1021
1022        testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1023    }
1024
1025    /*
1026     * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1027     * modified according to RFC 2060,
1028     * and supplemented with the one example in RFC 2060 itself.
1029     */
1030    {
1031        static const uint8_t imap[] = {
1032            /*  Hi Mom -&Jjo--!
1033                A&ImIDkQ-.
1034                &-
1035                &ZeVnLIqe-
1036                \
1037                ~peter
1038                /mail
1039                /&ZeVnLIqe-
1040                /&U,BTFw-
1041            */
1042            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1043            0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1044            0x26, 0x2d,
1045            0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1046            0x5c,
1047            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1048            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1049            0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1050            0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1051        };
1052        static const UChar unicode[] = {
1053            /*  Hi Mom -<WHITE SMILING FACE>-!
1054                A<NOT IDENTICAL TO><ALPHA>.
1055                &
1056                [Japanese word "nihongo"]
1057                \
1058                ~peter
1059                /mail
1060                /<65e5, 672c, 8a9e>
1061                /<53f0, 5317>
1062            */
1063            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1064            0x41, 0x2262, 0x0391, 0x2e,
1065            0x26,
1066            0x65e5, 0x672c, 0x8a9e,
1067            0x5c,
1068            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1069            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1070            0x2f, 0x65e5, 0x672c, 0x8a9e,
1071            0x2f, 0x53f0, 0x5317
1072        };
1073        static const int32_t toUnicodeOffsets[] = {
1074            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1075            15, 17, 19, 24,
1076            25,
1077            28, 30, 33,
1078            37,
1079            38, 39, 40, 41, 42, 43,
1080            44, 45, 46, 47, 48,
1081            49, 51, 53, 56,
1082            60, 62, 64
1083        };
1084        static const int32_t fromUnicodeOffsets[] = {
1085            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1086            11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1087            15, 15,
1088            16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1089            19,
1090            20, 21, 22, 23, 24, 25,
1091            26, 27, 28, 29, 30,
1092            31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1093            35, 36, 36, 36, 37, 37, 37, 37, 37
1094        };
1095
1096        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1097
1098        testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1099    }
1100
1101    /* Test UTF-8 bad data handling*/
1102    {
1103        static const uint8_t utf8[]={
1104            0x61,
1105            0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1106            0x00,
1107            0x62,
1108            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1109            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1110            0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1111            0xdf, 0xbf,                     /* 7ff */
1112            0xbf,                           /* truncated tail */
1113            0xf4, 0x90, 0x80, 0x80,         /* 11FFFF */
1114            0x02
1115        };
1116
1117        static const uint16_t utf8Expected[]={
1118            0x0061,
1119            0xfffd,
1120            0x0000,
1121            0x0062,
1122            0xfffd,
1123            0xfffd,
1124            0xdbff, 0xdfff,
1125            0x07ff,
1126            0xfffd,
1127            0xfffd,
1128            0x0002
1129        };
1130
1131        static const int32_t utf8Offsets[]={
1132            0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1133        };
1134        testConvertToU(utf8, sizeof(utf8),
1135                       utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1136
1137    }
1138
1139    /* Test UTF-32BE bad data handling*/
1140    {
1141        static const uint8_t utf32[]={
1142            0x00, 0x00, 0x00, 0x61,
1143            0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1144            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1145            0x00, 0x00, 0x00, 0x62,
1146            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1147            0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1148            0x00, 0x00, 0x01, 0x62,
1149            0x00, 0x00, 0x02, 0x62
1150        };
1151        static const uint16_t utf32Expected[]={
1152            0x0061,
1153            0xfffd,         /* 0x110000 out of range */
1154            0xDBFF,         /* 0x10FFFF in range */
1155            0xDFFF,
1156            0x0062,
1157            0xfffd,         /* 0xffffffff out of range */
1158            0xfffd,         /* 0x7fffffff out of range */
1159            0x0162,
1160            0x0262
1161        };
1162        static const int32_t utf32Offsets[]={
1163            0, 4, 8, 8, 12, 16, 20, 24, 28
1164        };
1165        static const uint8_t utf32ExpectedBack[]={
1166            0x00, 0x00, 0x00, 0x61,
1167            0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1168            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1169            0x00, 0x00, 0x00, 0x62,
1170            0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1171            0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1172            0x00, 0x00, 0x01, 0x62,
1173            0x00, 0x00, 0x02, 0x62
1174        };
1175        static const int32_t utf32OffsetsBack[]={
1176            0,0,0,0,
1177            1,1,1,1,
1178            2,2,2,2,
1179            4,4,4,4,
1180            5,5,5,5,
1181            6,6,6,6,
1182            7,7,7,7,
1183            8,8,8,8
1184        };
1185
1186        testConvertToU(utf32, sizeof(utf32),
1187                       utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1188        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1189            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1190    }
1191
1192    /* Test UTF-32LE bad data handling*/
1193    {
1194        static const uint8_t utf32[]={
1195            0x61, 0x00, 0x00, 0x00,
1196            0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1197            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1198            0x62, 0x00, 0x00, 0x00,
1199            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1200            0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1201            0x62, 0x01, 0x00, 0x00,
1202            0x62, 0x02, 0x00, 0x00,
1203        };
1204
1205        static const uint16_t utf32Expected[]={
1206            0x0061,
1207            0xfffd,         /* 0x110000 out of range */
1208            0xDBFF,         /* 0x10FFFF in range */
1209            0xDFFF,
1210            0x0062,
1211            0xfffd,         /* 0xffffffff out of range */
1212            0xfffd,         /* 0x7fffffff out of range */
1213            0x0162,
1214            0x0262
1215        };
1216        static const int32_t utf32Offsets[]={
1217            0, 4, 8, 8, 12, 16, 20, 24, 28
1218        };
1219        static const uint8_t utf32ExpectedBack[]={
1220            0x61, 0x00, 0x00, 0x00,
1221            0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1222            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1223            0x62, 0x00, 0x00, 0x00,
1224            0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1225            0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1226            0x62, 0x01, 0x00, 0x00,
1227            0x62, 0x02, 0x00, 0x00
1228        };
1229        static const int32_t utf32OffsetsBack[]={
1230            0,0,0,0,
1231            1,1,1,1,
1232            2,2,2,2,
1233            4,4,4,4,
1234            5,5,5,5,
1235            6,6,6,6,
1236            7,7,7,7,
1237            8,8,8,8
1238        };
1239        testConvertToU(utf32, sizeof(utf32),
1240            utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1241        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1242            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1243    }
1244}
1245
1246static void TestCoverageMBCS(){
1247#if 0
1248    UErrorCode status = U_ZERO_ERROR;
1249    const char *directory = loadTestData(&status);
1250    char* tdpath = NULL;
1251    char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1252    int len = strlen(directory);
1253    char* index=NULL;
1254
1255    tdpath = (char*) malloc(sizeof(char) * (len * 2));
1256    uprv_strcpy(saveDirectory,u_getDataDirectory());
1257    log_verbose("Retrieved data directory %s \n",saveDirectory);
1258    uprv_strcpy(tdpath,directory);
1259    index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1260
1261    if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1262            *(index+1)=0;
1263    }
1264    u_setDataDirectory(tdpath);
1265    log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1266#endif
1267
1268    /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1269      which is test file for MBCS conversion with single-byte codepage data.*/
1270    {
1271
1272        /* MBCS with single byte codepage data test1.ucm*/
1273        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1274        const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1275        int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1276
1277        /*from Unicode*/
1278        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1279            expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1280    }
1281
1282    /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1283      which is test file for MBCS conversion with three-byte codepage data.*/
1284    {
1285
1286        /* MBCS with three byte codepage data test3.ucm*/
1287        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1288        const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1289        int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1290
1291        const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1292        const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1293        int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1294
1295        /*from Unicode*/
1296        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1297            expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1298
1299        /*to Unicode*/
1300        testConvertToU(test3input, sizeof(test3input),
1301            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1302
1303    }
1304
1305    /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1306      which is test file for MBCS conversion with four-byte codepage data.*/
1307    {
1308
1309        /* MBCS with three byte codepage data test4.ucm*/
1310        static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1311        static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1312        static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1313
1314        static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1315        static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1316        static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1317
1318        /*from Unicode*/
1319        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1320            expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1321
1322        /*to Unicode*/
1323        testConvertToU(test4input, sizeof(test4input),
1324            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1325
1326    }
1327#if 0
1328    free(tdpath);
1329    /* restore the original data directory */
1330    log_verbose("Setting the data directory to %s \n", saveDirectory);
1331    u_setDataDirectory(saveDirectory);
1332    free(saveDirectory);
1333#endif
1334
1335}
1336
1337static void TestConverterType(const char *convName, UConverterType convType) {
1338    UConverter* myConverter;
1339    UErrorCode err = U_ZERO_ERROR;
1340
1341    myConverter = my_ucnv_open(convName, &err);
1342
1343    if (U_FAILURE(err)) {
1344        log_data_err("Failed to create an %s converter\n", convName);
1345        return;
1346    }
1347    else
1348    {
1349        if (ucnv_getType(myConverter)!=convType) {
1350            log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1351                convName, convType);
1352        }
1353        else {
1354            log_verbose("ucnv_getType %s ok\n", convName);
1355        }
1356    }
1357    ucnv_close(myConverter);
1358}
1359
1360static void TestConverterTypesAndStarters()
1361{
1362#if !UCONFIG_NO_LEGACY_CONVERSION
1363    UConverter* myConverter;
1364    UErrorCode err = U_ZERO_ERROR;
1365    UBool mystarters[256];
1366
1367/*    const UBool expectedKSCstarters[256] = {
1368        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1369        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1370        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1371        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1372        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1373        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1374        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1375        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1376        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1377        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1378        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1379        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1380        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1381        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382        FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1383        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1384        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1385        TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1386        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1387        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1388        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1389        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1390        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1391        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1392        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1393        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1394
1395
1396    log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1397
1398    myConverter = ucnv_open("ksc", &err);
1399    if (U_FAILURE(err)) {
1400      log_data_err("Failed to create an ibm-ksc converter\n");
1401      return;
1402    }
1403    else
1404    {
1405        if (ucnv_getType(myConverter)!=UCNV_MBCS)
1406            log_err("ucnv_getType Failed for ibm-949\n");
1407        else
1408            log_verbose("ucnv_getType ibm-949 ok\n");
1409
1410        if(myConverter!=NULL)
1411            ucnv_getStarters(myConverter, mystarters, &err);
1412
1413        /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1414          log_err("Failed ucnv_getStarters for ksc\n");
1415          else
1416          log_verbose("ucnv_getStarters ok\n");*/
1417
1418    }
1419    ucnv_close(myConverter);
1420
1421    TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1422    TestConverterType("ibm-878", UCNV_SBCS);
1423#endif
1424
1425    TestConverterType("iso-8859-1", UCNV_LATIN_1);
1426
1427    TestConverterType("ibm-1208", UCNV_UTF8);
1428
1429    TestConverterType("utf-8", UCNV_UTF8);
1430    TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1431    TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1432    TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1433    TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1434
1435#if !UCONFIG_NO_LEGACY_CONVERSION
1436
1437#if defined(U_ENABLE_GENERIC_ISO_2022)
1438    TestConverterType("iso-2022", UCNV_ISO_2022);
1439#endif
1440
1441    TestConverterType("hz", UCNV_HZ);
1442#endif
1443
1444    TestConverterType("scsu", UCNV_SCSU);
1445
1446#if !UCONFIG_NO_LEGACY_CONVERSION
1447    TestConverterType("x-iscii-de", UCNV_ISCII);
1448#endif
1449
1450    TestConverterType("ascii", UCNV_US_ASCII);
1451    TestConverterType("utf-7", UCNV_UTF7);
1452    TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1453    TestConverterType("bocu-1", UCNV_BOCU1);
1454}
1455
1456static void
1457TestAmbiguousConverter(UConverter *cnv) {
1458    static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1459    UChar outUnicode[20]={ 0, 0, 0, 0 };
1460
1461    const char *s;
1462    UChar *u;
1463    UErrorCode errorCode;
1464    UBool isAmbiguous;
1465
1466    /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1467    errorCode=U_ZERO_ERROR;
1468    s=inBytes;
1469    u=outUnicode;
1470    ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1471    if(U_FAILURE(errorCode)) {
1472        /* we do not care about general failures in this test; the input may just not be mappable */
1473        return;
1474    }
1475
1476    if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1477        /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1478        /* There are some encodings that are partially ASCII based,
1479        like the ISO-7 and GSM series of codepages, which we ignore. */
1480        return;
1481    }
1482
1483    isAmbiguous=ucnv_isAmbiguous(cnv);
1484
1485    /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1486    if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1487        log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1488            ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1489        return;
1490    }
1491
1492    if(outUnicode[2]!=0x5c) {
1493        /* needs fixup, fix it */
1494        ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1495        if(outUnicode[2]!=0x5c) {
1496            /* the fix failed */
1497            log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1498            return;
1499        }
1500    }
1501}
1502
1503static void TestAmbiguous()
1504{
1505    UErrorCode status = U_ZERO_ERROR;
1506    UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1507    static const char target[] = {
1508        /* "\\usr\\local\\share\\data\\icutest.txt" */
1509        0x5c, 0x75, 0x73, 0x72,
1510        0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1511        0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1512        0x5c, 0x64, 0x61, 0x74, 0x61,
1513        0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1514        0
1515    };
1516    UChar asciiResult[200], sjisResult[200];
1517    int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1518    const char *name;
1519
1520    /* enumerate all converters */
1521    status=U_ZERO_ERROR;
1522    for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1523        cnv=ucnv_open(name, &status);
1524        if(U_SUCCESS(status)) {
1525            /* BEGIN android-changed. To save space Android does not build full ISO-2022-CN CJK tables. */
1526            const char* cnvName = ucnv_getName(cnv, &status);
1527            if (strlen(cnvName) < 8 ||
1528                strncmp(cnvName, "ISO_2022_CN", 8) != 0) {
1529            TestAmbiguousConverter(cnv);
1530            }
1531            /* END android-changed */
1532            ucnv_close(cnv);
1533        } else {
1534            log_err("error: unable to open available converter \"%s\"\n", name);
1535            status=U_ZERO_ERROR;
1536        }
1537    }
1538
1539#if !UCONFIG_NO_LEGACY_CONVERSION
1540    sjis_cnv = ucnv_open("ibm-943", &status);
1541    if (U_FAILURE(status))
1542    {
1543        log_data_err("Failed to create a SJIS converter\n");
1544        return;
1545    }
1546    ascii_cnv = ucnv_open("LATIN-1", &status);
1547    if (U_FAILURE(status))
1548    {
1549        log_data_err("Failed to create a LATIN-1 converter\n");
1550        ucnv_close(sjis_cnv);
1551        return;
1552    }
1553    /* convert target from SJIS to Unicode */
1554    sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1555    if (U_FAILURE(status))
1556    {
1557        log_err("Failed to convert the SJIS string.\n");
1558        ucnv_close(sjis_cnv);
1559        ucnv_close(ascii_cnv);
1560        return;
1561    }
1562    /* convert target from Latin-1 to Unicode */
1563    /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1564    if (U_FAILURE(status))
1565    {
1566        log_err("Failed to convert the Latin-1 string.\n");
1567        ucnv_close(sjis_cnv);
1568        ucnv_close(ascii_cnv);
1569        return;
1570    }
1571    if (!ucnv_isAmbiguous(sjis_cnv))
1572    {
1573        log_err("SJIS converter should contain ambiguous character mappings.\n");
1574        ucnv_close(sjis_cnv);
1575        ucnv_close(ascii_cnv);
1576        return;
1577    }
1578    if (u_strcmp(sjisResult, asciiResult) == 0)
1579    {
1580        log_err("File separators for SJIS don't need to be fixed.\n");
1581    }
1582    ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1583    if (u_strcmp(sjisResult, asciiResult) != 0)
1584    {
1585        log_err("Fixing file separator for SJIS failed.\n");
1586    }
1587    ucnv_close(sjis_cnv);
1588    ucnv_close(ascii_cnv);
1589#endif
1590}
1591
1592static void
1593TestSignatureDetection(){
1594    /* with null terminated strings */
1595    {
1596        static const char* data[] = {
1597                "\xFE\xFF\x00\x00",     /* UTF-16BE */
1598                "\xFF\xFE\x00\x00",     /* UTF-16LE */
1599                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1600                "\x0E\xFE\xFF\x00",     /* SCSU     */
1601
1602                "\xFE\xFF",             /* UTF-16BE */
1603                "\xFF\xFE",             /* UTF-16LE */
1604                "\xEF\xBB\xBF",         /* UTF-8    */
1605                "\x0E\xFE\xFF",         /* SCSU     */
1606
1607                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1608                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1609                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1610                "\x0E\xFE\xFF\x41",     /* SCSU     */
1611
1612                "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1613                "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1614                "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1615                "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1616                "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1617
1618                "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1619        };
1620        static const char* expected[] = {
1621                "UTF-16BE",
1622                "UTF-16LE",
1623                "UTF-8",
1624                "SCSU",
1625
1626                "UTF-16BE",
1627                "UTF-16LE",
1628                "UTF-8",
1629                "SCSU",
1630
1631                "UTF-16BE",
1632                "UTF-16LE",
1633                "UTF-8",
1634                "SCSU",
1635
1636                "UTF-7",
1637                "UTF-7",
1638                "UTF-7",
1639                "UTF-7",
1640                "UTF-7",
1641                "UTF-EBCDIC"
1642        };
1643        static const int32_t expectedLength[] ={
1644            2,
1645            2,
1646            3,
1647            3,
1648
1649            2,
1650            2,
1651            3,
1652            3,
1653
1654            2,
1655            2,
1656            3,
1657            3,
1658
1659            5,
1660            4,
1661            4,
1662            4,
1663            4,
1664            4
1665        };
1666        int i=0;
1667        UErrorCode err;
1668        int32_t signatureLength = -1;
1669        const char* source = NULL;
1670        const char* enc = NULL;
1671        for( ; i<sizeof(data)/sizeof(char*); i++){
1672            err = U_ZERO_ERROR;
1673            source = data[i];
1674            enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1675            if(U_FAILURE(err)){
1676                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1677                continue;
1678            }
1679            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1680                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1681                continue;
1682            }
1683            if(signatureLength != expectedLength[i]){
1684                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1685            }
1686        }
1687    }
1688    {
1689        static const char* data[] = {
1690                "\xFE\xFF\x00",         /* UTF-16BE */
1691                "\xFF\xFE\x00",         /* UTF-16LE */
1692                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1693                "\x0E\xFE\xFF\x00",     /* SCSU     */
1694                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1695                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1696                "\xFE\xFF",             /* UTF-16BE */
1697                "\xFF\xFE",             /* UTF-16LE */
1698                "\xEF\xBB\xBF",         /* UTF-8    */
1699                "\x0E\xFE\xFF",         /* SCSU     */
1700                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1701                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1702                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1703                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1704                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1705                "\x0E\xFE\xFF\x41",     /* SCSU     */
1706                "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1707                "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1708                "\xFB\xEE\x28",         /* BOCU-1   */
1709                "\xFF\x41\x42"          /* NULL     */
1710        };
1711        static const int len[] = {
1712            3,
1713            3,
1714            4,
1715            4,
1716            4,
1717            4,
1718            2,
1719            2,
1720            3,
1721            3,
1722            4,
1723            4,
1724            4,
1725            4,
1726            4,
1727            4,
1728            5,
1729            5,
1730            3,
1731            3
1732        };
1733
1734        static const char* expected[] = {
1735                "UTF-16BE",
1736                "UTF-16LE",
1737                "UTF-8",
1738                "SCSU",
1739                "UTF-32BE",
1740                "UTF-32LE",
1741                "UTF-16BE",
1742                "UTF-16LE",
1743                "UTF-8",
1744                "SCSU",
1745                "UTF-32BE",
1746                "UTF-32LE",
1747                "UTF-16BE",
1748                "UTF-16LE",
1749                "UTF-8",
1750                "SCSU",
1751                "UTF-32BE",
1752                "UTF-32LE",
1753                "BOCU-1",
1754                NULL
1755        };
1756        static const int32_t expectedLength[] ={
1757            2,
1758            2,
1759            3,
1760            3,
1761            4,
1762            4,
1763            2,
1764            2,
1765            3,
1766            3,
1767            4,
1768            4,
1769            2,
1770            2,
1771            3,
1772            3,
1773            4,
1774            4,
1775            3,
1776            0
1777        };
1778        int i=0;
1779        UErrorCode err;
1780        int32_t signatureLength = -1;
1781        int32_t sourceLength=-1;
1782        const char* source = NULL;
1783        const char* enc = NULL;
1784        for( ; i<sizeof(data)/sizeof(char*); i++){
1785            err = U_ZERO_ERROR;
1786            source = data[i];
1787            sourceLength = len[i];
1788            enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1789            if(U_FAILURE(err)){
1790                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1791                continue;
1792            }
1793            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1794                if(expected[i] !=NULL){
1795                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1796                 continue;
1797                }
1798            }
1799            if(signatureLength != expectedLength[i]){
1800                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1801            }
1802        }
1803    }
1804}
1805
1806static void TestUTF7() {
1807    /* test input */
1808    static const uint8_t in[]={
1809        /* H - +Jjo- - ! +- +2AHcAQ */
1810        0x48,
1811        0x2d,
1812        0x2b, 0x4a, 0x6a, 0x6f,
1813        0x2d, 0x2d,
1814        0x21,
1815        0x2b, 0x2d,
1816        0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1817    };
1818
1819    /* expected test results */
1820    static const int32_t results[]={
1821        /* number of bytes read, code point */
1822        1, 0x48,
1823        1, 0x2d,
1824        4, 0x263a, /* <WHITE SMILING FACE> */
1825        2, 0x2d,
1826        1, 0x21,
1827        2, 0x2b,
1828        7, 0x10401
1829    };
1830
1831    const char *cnvName;
1832    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1833    UErrorCode errorCode=U_ZERO_ERROR;
1834    UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1835    if(U_FAILURE(errorCode)) {
1836        log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1837        return;
1838    }
1839    TestNextUChar(cnv, source, limit, results, "UTF-7");
1840    /* Test the condition when source >= sourceLimit */
1841    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1842    cnvName = ucnv_getName(cnv, &errorCode);
1843    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1844        log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1845    }
1846    ucnv_close(cnv);
1847}
1848
1849static void TestIMAP() {
1850    /* test input */
1851    static const uint8_t in[]={
1852        /* H - &Jjo- - ! &- &2AHcAQ- \ */
1853        0x48,
1854        0x2d,
1855        0x26, 0x4a, 0x6a, 0x6f,
1856        0x2d, 0x2d,
1857        0x21,
1858        0x26, 0x2d,
1859        0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1860    };
1861
1862    /* expected test results */
1863    static const int32_t results[]={
1864        /* number of bytes read, code point */
1865        1, 0x48,
1866        1, 0x2d,
1867        4, 0x263a, /* <WHITE SMILING FACE> */
1868        2, 0x2d,
1869        1, 0x21,
1870        2, 0x26,
1871        7, 0x10401
1872    };
1873
1874    const char *cnvName;
1875    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1876    UErrorCode errorCode=U_ZERO_ERROR;
1877    UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1878    if(U_FAILURE(errorCode)) {
1879        log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1880        return;
1881    }
1882    TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1883    /* Test the condition when source >= sourceLimit */
1884    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1885    cnvName = ucnv_getName(cnv, &errorCode);
1886    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1887        log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1888    }
1889    ucnv_close(cnv);
1890}
1891
1892static void TestUTF8() {
1893    /* test input */
1894    static const uint8_t in[]={
1895        0x61,
1896        0xc2, 0x80,
1897        0xe0, 0xa0, 0x80,
1898        0xf0, 0x90, 0x80, 0x80,
1899        0xf4, 0x84, 0x8c, 0xa1,
1900        0xf0, 0x90, 0x90, 0x81
1901    };
1902
1903    /* expected test results */
1904    static const int32_t results[]={
1905        /* number of bytes read, code point */
1906        1, 0x61,
1907        2, 0x80,
1908        3, 0x800,
1909        4, 0x10000,
1910        4, 0x104321,
1911        4, 0x10401
1912    };
1913
1914    /* error test input */
1915    static const uint8_t in2[]={
1916        0x61,
1917        0xc0, 0x80,                     /* illegal non-shortest form */
1918        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1919        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1920        0xc0, 0xc0,                     /* illegal trail byte */
1921        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1922        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1923        0xfe,                           /* illegal byte altogether */
1924        0x62
1925    };
1926
1927    /* expected error test results */
1928    static const int32_t results2[]={
1929        /* number of bytes read, code point */
1930        1, 0x61,
1931        22, 0x62
1932    };
1933
1934    UConverterToUCallback cb;
1935    const void *p;
1936
1937    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1938    UErrorCode errorCode=U_ZERO_ERROR;
1939    UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1940    if(U_FAILURE(errorCode)) {
1941        log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1942        return;
1943    }
1944    TestNextUChar(cnv, source, limit, results, "UTF-8");
1945    /* Test the condition when source >= sourceLimit */
1946    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1947
1948    /* test error behavior with a skip callback */
1949    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1950    source=(const char *)in2;
1951    limit=(const char *)(in2+sizeof(in2));
1952    TestNextUChar(cnv, source, limit, results2, "UTF-8");
1953
1954    ucnv_close(cnv);
1955}
1956
1957static void TestCESU8() {
1958    /* test input */
1959    static const uint8_t in[]={
1960        0x61,
1961        0xc2, 0x80,
1962        0xe0, 0xa0, 0x80,
1963        0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1964        0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1965        0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1966        0xef, 0xbf, 0xbc
1967    };
1968
1969    /* expected test results */
1970    static const int32_t results[]={
1971        /* number of bytes read, code point */
1972        1, 0x61,
1973        2, 0x80,
1974        3, 0x800,
1975        6, 0x10000,
1976        3, 0xdc01,
1977        -1,0xd802,  /* may read 3 or 6 bytes */
1978        -1,0x10ffff,/* may read 0 or 3 bytes */
1979        3, 0xfffc
1980    };
1981
1982    /* error test input */
1983    static const uint8_t in2[]={
1984        0x61,
1985        0xc0, 0x80,                     /* illegal non-shortest form */
1986        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1987        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1988        0xc0, 0xc0,                     /* illegal trail byte */
1989        0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
1990        0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
1991        0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
1992        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1993        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1994        0xfe,                           /* illegal byte altogether */
1995        0x62
1996    };
1997
1998    /* expected error test results */
1999    static const int32_t results2[]={
2000        /* number of bytes read, code point */
2001        1, 0x61,
2002        34, 0x62
2003    };
2004
2005    UConverterToUCallback cb;
2006    const void *p;
2007
2008    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2009    UErrorCode errorCode=U_ZERO_ERROR;
2010    UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2011    if(U_FAILURE(errorCode)) {
2012        log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2013        return;
2014    }
2015    TestNextUChar(cnv, source, limit, results, "CESU-8");
2016    /* Test the condition when source >= sourceLimit */
2017    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2018
2019    /* test error behavior with a skip callback */
2020    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2021    source=(const char *)in2;
2022    limit=(const char *)(in2+sizeof(in2));
2023    TestNextUChar(cnv, source, limit, results2, "CESU-8");
2024
2025    ucnv_close(cnv);
2026}
2027
2028static void TestUTF16() {
2029    /* test input */
2030    static const uint8_t in1[]={
2031        0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2032    };
2033    static const uint8_t in2[]={
2034        0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2035    };
2036    static const uint8_t in3[]={
2037        0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2038    };
2039
2040    /* expected test results */
2041    static const int32_t results1[]={
2042        /* number of bytes read, code point */
2043        4, 0x4e00,
2044        2, 0xfeff
2045    };
2046    static const int32_t results2[]={
2047        /* number of bytes read, code point */
2048        4, 0x004e,
2049        2, 0xfffe
2050    };
2051    static const int32_t results3[]={
2052        /* number of bytes read, code point */
2053        2, 0xfefe,
2054        2, 0x4e00,
2055        2, 0xfeff,
2056        4, 0x20001
2057    };
2058
2059    const char *source, *limit;
2060
2061    UErrorCode errorCode=U_ZERO_ERROR;
2062    UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2063    if(U_FAILURE(errorCode)) {
2064        log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2065        return;
2066    }
2067
2068    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2069    TestNextUChar(cnv, source, limit, results1, "UTF-16");
2070
2071    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2072    ucnv_resetToUnicode(cnv);
2073    TestNextUChar(cnv, source, limit, results2, "UTF-16");
2074
2075    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2076    ucnv_resetToUnicode(cnv);
2077    TestNextUChar(cnv, source, limit, results3, "UTF-16");
2078
2079    /* Test the condition when source >= sourceLimit */
2080    ucnv_resetToUnicode(cnv);
2081    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2082
2083    ucnv_close(cnv);
2084}
2085
2086static void TestUTF16BE() {
2087    /* test input */
2088    static const uint8_t in[]={
2089        0x00, 0x61,
2090        0x00, 0xc0,
2091        0x00, 0x31,
2092        0x00, 0xf4,
2093        0xce, 0xfe,
2094        0xd8, 0x01, 0xdc, 0x01
2095    };
2096
2097    /* expected test results */
2098    static const int32_t results[]={
2099        /* number of bytes read, code point */
2100        2, 0x61,
2101        2, 0xc0,
2102        2, 0x31,
2103        2, 0xf4,
2104        2, 0xcefe,
2105        4, 0x10401
2106    };
2107
2108    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2109    UErrorCode errorCode=U_ZERO_ERROR;
2110    UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2111    if(U_FAILURE(errorCode)) {
2112        log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2113        return;
2114    }
2115    TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2116    /* Test the condition when source >= sourceLimit */
2117    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2118    /*Test for the condition where there is an invalid character*/
2119    {
2120        static const uint8_t source2[]={0x61};
2121        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2122        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2123    }
2124#if 0
2125    /*
2126     * Test disabled because currently the UTF-16BE/LE converters are supposed
2127     * to not set errors for unpaired surrogates.
2128     * This may change with
2129     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2130     */
2131
2132    /*Test for the condition where there is a surrogate pair*/
2133    {
2134        const uint8_t source2[]={0xd8, 0x01};
2135        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2136    }
2137#endif
2138    ucnv_close(cnv);
2139}
2140
2141static void
2142TestUTF16LE() {
2143    /* test input */
2144    static const uint8_t in[]={
2145        0x61, 0x00,
2146        0x31, 0x00,
2147        0x4e, 0x2e,
2148        0x4e, 0x00,
2149        0x01, 0xd8, 0x01, 0xdc
2150    };
2151
2152    /* expected test results */
2153    static const int32_t results[]={
2154        /* number of bytes read, code point */
2155        2, 0x61,
2156        2, 0x31,
2157        2, 0x2e4e,
2158        2, 0x4e,
2159        4, 0x10401
2160    };
2161
2162    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2163    UErrorCode errorCode=U_ZERO_ERROR;
2164    UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2165    if(U_FAILURE(errorCode)) {
2166        log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2167        return;
2168    }
2169    TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2170    /* Test the condition when source >= sourceLimit */
2171    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2172    /*Test for the condition where there is an invalid character*/
2173    {
2174        static const uint8_t source2[]={0x61};
2175        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2176        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2177    }
2178#if 0
2179    /*
2180     * Test disabled because currently the UTF-16BE/LE converters are supposed
2181     * to not set errors for unpaired surrogates.
2182     * This may change with
2183     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2184     */
2185
2186    /*Test for the condition where there is a surrogate character*/
2187    {
2188        static const uint8_t source2[]={0x01, 0xd8};
2189        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2190    }
2191#endif
2192
2193    ucnv_close(cnv);
2194}
2195
2196static void TestUTF32() {
2197    /* test input */
2198    static const uint8_t in1[]={
2199        0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2200    };
2201    static const uint8_t in2[]={
2202        0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2203    };
2204    static const uint8_t in3[]={
2205        0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2206    };
2207
2208    /* expected test results */
2209    static const int32_t results1[]={
2210        /* number of bytes read, code point */
2211        8, 0x100f00,
2212        4, 0xfeff
2213    };
2214    static const int32_t results2[]={
2215        /* number of bytes read, code point */
2216        8, 0x0f1000,
2217        4, 0xfffe
2218    };
2219    static const int32_t results3[]={
2220        /* number of bytes read, code point */
2221        4, 0xfefe,
2222        4, 0x100f00,
2223        4, 0xfffd, /* unmatched surrogate */
2224        4, 0xfffd  /* unmatched surrogate */
2225    };
2226
2227    const char *source, *limit;
2228
2229    UErrorCode errorCode=U_ZERO_ERROR;
2230    UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2231    if(U_FAILURE(errorCode)) {
2232        log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2233        return;
2234    }
2235
2236    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2237    TestNextUChar(cnv, source, limit, results1, "UTF-32");
2238
2239    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2240    ucnv_resetToUnicode(cnv);
2241    TestNextUChar(cnv, source, limit, results2, "UTF-32");
2242
2243    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2244    ucnv_resetToUnicode(cnv);
2245    TestNextUChar(cnv, source, limit, results3, "UTF-32");
2246
2247    /* Test the condition when source >= sourceLimit */
2248    ucnv_resetToUnicode(cnv);
2249    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2250
2251    ucnv_close(cnv);
2252}
2253
2254static void
2255TestUTF32BE() {
2256    /* test input */
2257    static const uint8_t in[]={
2258        0x00, 0x00, 0x00, 0x61,
2259        0x00, 0x00, 0x30, 0x61,
2260        0x00, 0x00, 0xdc, 0x00,
2261        0x00, 0x00, 0xd8, 0x00,
2262        0x00, 0x00, 0xdf, 0xff,
2263        0x00, 0x00, 0xff, 0xfe,
2264        0x00, 0x10, 0xab, 0xcd,
2265        0x00, 0x10, 0xff, 0xff
2266    };
2267
2268    /* expected test results */
2269    static const int32_t results[]={
2270        /* number of bytes read, code point */
2271        4, 0x61,
2272        4, 0x3061,
2273        4, 0xfffd,
2274        4, 0xfffd,
2275        4, 0xfffd,
2276        4, 0xfffe,
2277        4, 0x10abcd,
2278        4, 0x10ffff
2279    };
2280
2281    /* error test input */
2282    static const uint8_t in2[]={
2283        0x00, 0x00, 0x00, 0x61,
2284        0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2285        0x00, 0x00, 0x00, 0x62,
2286        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2287        0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2288        0x00, 0x00, 0x01, 0x62,
2289        0x00, 0x00, 0x02, 0x62
2290    };
2291
2292    /* expected error test results */
2293    static const int32_t results2[]={
2294        /* number of bytes read, code point */
2295        4,  0x61,
2296        8,  0x62,
2297        12, 0x162,
2298        4,  0x262
2299    };
2300
2301    UConverterToUCallback cb;
2302    const void *p;
2303
2304    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2305    UErrorCode errorCode=U_ZERO_ERROR;
2306    UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2307    if(U_FAILURE(errorCode)) {
2308        log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2309        return;
2310    }
2311    TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2312
2313    /* Test the condition when source >= sourceLimit */
2314    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2315
2316    /* test error behavior with a skip callback */
2317    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2318    source=(const char *)in2;
2319    limit=(const char *)(in2+sizeof(in2));
2320    TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2321
2322    ucnv_close(cnv);
2323}
2324
2325static void
2326TestUTF32LE() {
2327    /* test input */
2328    static const uint8_t in[]={
2329        0x61, 0x00, 0x00, 0x00,
2330        0x61, 0x30, 0x00, 0x00,
2331        0x00, 0xdc, 0x00, 0x00,
2332        0x00, 0xd8, 0x00, 0x00,
2333        0xff, 0xdf, 0x00, 0x00,
2334        0xfe, 0xff, 0x00, 0x00,
2335        0xcd, 0xab, 0x10, 0x00,
2336        0xff, 0xff, 0x10, 0x00
2337    };
2338
2339    /* expected test results */
2340    static const int32_t results[]={
2341        /* number of bytes read, code point */
2342        4, 0x61,
2343        4, 0x3061,
2344        4, 0xfffd,
2345        4, 0xfffd,
2346        4, 0xfffd,
2347        4, 0xfffe,
2348        4, 0x10abcd,
2349        4, 0x10ffff
2350    };
2351
2352    /* error test input */
2353    static const uint8_t in2[]={
2354        0x61, 0x00, 0x00, 0x00,
2355        0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2356        0x62, 0x00, 0x00, 0x00,
2357        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2358        0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2359        0x62, 0x01, 0x00, 0x00,
2360        0x62, 0x02, 0x00, 0x00,
2361    };
2362
2363    /* expected error test results */
2364    static const int32_t results2[]={
2365        /* number of bytes read, code point */
2366        4,  0x61,
2367        8,  0x62,
2368        12, 0x162,
2369        4,  0x262,
2370    };
2371
2372    UConverterToUCallback cb;
2373    const void *p;
2374
2375    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2376    UErrorCode errorCode=U_ZERO_ERROR;
2377    UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2378    if(U_FAILURE(errorCode)) {
2379        log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2380        return;
2381    }
2382    TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2383
2384    /* Test the condition when source >= sourceLimit */
2385    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2386
2387    /* test error behavior with a skip callback */
2388    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2389    source=(const char *)in2;
2390    limit=(const char *)(in2+sizeof(in2));
2391    TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2392
2393    ucnv_close(cnv);
2394}
2395
2396static void
2397TestLATIN1() {
2398    /* test input */
2399    static const uint8_t in[]={
2400       0x61,
2401       0x31,
2402       0x32,
2403       0xc0,
2404       0xf0,
2405       0xf4,
2406    };
2407
2408    /* expected test results */
2409    static const int32_t results[]={
2410        /* number of bytes read, code point */
2411        1, 0x61,
2412        1, 0x31,
2413        1, 0x32,
2414        1, 0xc0,
2415        1, 0xf0,
2416        1, 0xf4,
2417    };
2418    static const uint16_t in1[] = {
2419        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2420        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2421        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2422        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2423        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2424        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2425        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2426        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2427        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2428        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2429        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2430        0xcb, 0x82
2431    };
2432    static const uint8_t out1[] = {
2433        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2434        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2435        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2436        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2437        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2438        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2439        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2440        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2441        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2442        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2443        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2444        0xcb, 0x82
2445    };
2446    static const uint16_t in2[]={
2447        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2448        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2449        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2450        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2451        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2452        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2453        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2454        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2455        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2456        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2457        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2458        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2459        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2460        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2461        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2462        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2463        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2464        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2465        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2466        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2467        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2468        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2469        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2470        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2471        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2472        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2473        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2474        0x37, 0x20, 0x2A, 0x2F,
2475    };
2476    static const unsigned char out2[]={
2477        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2478        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2479        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2480        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2481        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2482        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2483        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2484        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2485        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2486        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2487        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2488        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2489        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2490        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2491        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2492        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2493        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2494        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2495        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2496        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2497        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2498        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2499        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2500        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2501        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2502        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2503        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2504        0x37, 0x20, 0x2A, 0x2F,
2505    };
2506    const char *source=(const char *)in;
2507    const char *limit=(const char *)in+sizeof(in);
2508
2509    UErrorCode errorCode=U_ZERO_ERROR;
2510    UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2511    if(U_FAILURE(errorCode)) {
2512        log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2513        return;
2514    }
2515    TestNextUChar(cnv, source, limit, results, "LATIN_1");
2516    /* Test the condition when source >= sourceLimit */
2517    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2518    TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2519    TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2520
2521    ucnv_close(cnv);
2522}
2523
2524static void
2525TestSBCS() {
2526    /* test input */
2527    static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2528    /* expected test results */
2529    static const int32_t results[]={
2530        /* number of bytes read, code point */
2531        1, 0x61,
2532        1, 0xbf,
2533        1, 0xc4,
2534        1, 0x2021,
2535        1, 0xf8ff,
2536        1, 0x00d9
2537    };
2538
2539    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2540    UErrorCode errorCode=U_ZERO_ERROR;
2541    UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2542    if(U_FAILURE(errorCode)) {
2543        log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2544        return;
2545    }
2546    TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2547    /* Test the condition when source >= sourceLimit */
2548    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2549    /*Test for Illegal character */ /*
2550    {
2551    static const uint8_t input1[]={ 0xA1 };
2552    const char* illegalsource=(const char*)input1;
2553    TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2554    }
2555   */
2556    ucnv_close(cnv);
2557}
2558
2559static void
2560TestDBCS() {
2561    /* test input */
2562    static const uint8_t in[]={
2563        0x44, 0x6a,
2564        0xc4, 0x9c,
2565        0x7a, 0x74,
2566        0x46, 0xab,
2567        0x42, 0x5b,
2568
2569    };
2570
2571    /* expected test results */
2572    static const int32_t results[]={
2573        /* number of bytes read, code point */
2574        2, 0x00a7,
2575        2, 0xe1d2,
2576        2, 0x6962,
2577        2, 0xf842,
2578        2, 0xffe5,
2579    };
2580
2581    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2582    UErrorCode errorCode=U_ZERO_ERROR;
2583
2584    UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2585    if(U_FAILURE(errorCode)) {
2586        log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2587        return;
2588    }
2589    TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2590    /* Test the condition when source >= sourceLimit */
2591    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2592    /*Test for the condition where there is an invalid character*/
2593    {
2594        static const uint8_t source2[]={0x1a, 0x1b};
2595        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2596    }
2597    /*Test for the condition where we have a truncated char*/
2598    {
2599        static const uint8_t source1[]={0xc4};
2600        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2601        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2602    }
2603    ucnv_close(cnv);
2604}
2605
2606static void
2607TestMBCS() {
2608    /* test input */
2609    static const uint8_t in[]={
2610        0x01,
2611        0xa6, 0xa3,
2612        0x00,
2613        0xa6, 0xa1,
2614        0x08,
2615        0xc2, 0x76,
2616        0xc2, 0x78,
2617
2618    };
2619
2620    /* expected test results */
2621    static const int32_t results[]={
2622        /* number of bytes read, code point */
2623        1, 0x0001,
2624        2, 0x250c,
2625        1, 0x0000,
2626        2, 0x2500,
2627        1, 0x0008,
2628        2, 0xd60c,
2629        2, 0xd60e,
2630    };
2631
2632    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2633    UErrorCode errorCode=U_ZERO_ERROR;
2634
2635    UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2636    if(U_FAILURE(errorCode)) {
2637        log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2638        return;
2639    }
2640    TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2641    /* Test the condition when source >= sourceLimit */
2642    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2643    /*Test for the condition where there is an invalid character*/
2644    {
2645        static const uint8_t source2[]={0xa1, 0x80};
2646        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2647    }
2648    /*Test for the condition where we have a truncated char*/
2649    {
2650        static const uint8_t source1[]={0xc4};
2651        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2652        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2653    }
2654    ucnv_close(cnv);
2655
2656}
2657
2658#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2659static void
2660TestICCRunout() {
2661/*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2662
2663    const char *cnvName = "ibm-1363";
2664    UErrorCode status = U_ZERO_ERROR;
2665    const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2666    /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2667    const char *source = sourceData;
2668    const char *sourceLim = sourceData+sizeof(sourceData);
2669    UChar c1, c2, c3;
2670    UConverter *cnv=ucnv_open(cnvName, &status);
2671    if(U_FAILURE(status)) {
2672        log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2673	return;
2674    }
2675
2676#if 0
2677    {
2678    UChar   targetBuf[256];
2679    UChar   *target = targetBuf;
2680    UChar   *targetLim = target+256;
2681    ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2682
2683    log_info("After convert: target@%d, source@%d, status%s\n",
2684	     target-targetBuf, source-sourceData, u_errorName(status));
2685
2686    if(U_FAILURE(status)) {
2687	log_err("Failed to convert: %s\n", u_errorName(status));
2688    } else {
2689
2690    }
2691    }
2692#endif
2693
2694    c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2695    log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2696
2697    c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2698    log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2699
2700    c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2701    log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2702
2703    if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2704	log_verbose("OK\n");
2705    } else {
2706	log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2707    }
2708
2709    ucnv_close(cnv);
2710
2711}
2712#endif
2713
2714#ifdef U_ENABLE_GENERIC_ISO_2022
2715
2716static void
2717TestISO_2022() {
2718    /* test input */
2719    static const uint8_t in[]={
2720        0x1b, 0x25, 0x42,
2721        0x31,
2722        0x32,
2723        0x61,
2724        0xc2, 0x80,
2725        0xe0, 0xa0, 0x80,
2726        0xf0, 0x90, 0x80, 0x80
2727    };
2728
2729
2730
2731    /* expected test results */
2732    static const int32_t results[]={
2733        /* number of bytes read, code point */
2734        4, 0x0031,  /* 4 bytes including the escape sequence */
2735        1, 0x0032,
2736        1, 0x61,
2737        2, 0x80,
2738        3, 0x800,
2739        4, 0x10000
2740    };
2741
2742    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2743    UErrorCode errorCode=U_ZERO_ERROR;
2744    UConverter *cnv;
2745
2746    cnv=ucnv_open("ISO_2022", &errorCode);
2747    if(U_FAILURE(errorCode)) {
2748        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2749        return;
2750    }
2751    TestNextUChar(cnv, source, limit, results, "ISO_2022");
2752
2753    /* Test the condition when source >= sourceLimit */
2754    TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2755    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2756    /*Test for the condition where we have a truncated char*/
2757    {
2758        static const uint8_t source1[]={0xc4};
2759        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2760        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2761    }
2762    /*Test for the condition where there is an invalid character*/
2763    {
2764        static const uint8_t source2[]={0xa1, 0x01};
2765        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2766    }
2767    ucnv_close(cnv);
2768}
2769
2770#endif
2771
2772static void
2773TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2774    const UChar* uSource;
2775    const UChar* uSourceLimit;
2776    const char* cSource;
2777    const char* cSourceLimit;
2778    UChar *uTargetLimit =NULL;
2779    UChar *uTarget;
2780    char *cTarget;
2781    const char *cTargetLimit;
2782    char *cBuf;
2783    UChar *uBuf; /*,*test;*/
2784    int32_t uBufSize = 120;
2785    int len=0;
2786    int i=2;
2787    UErrorCode errorCode=U_ZERO_ERROR;
2788    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2789    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2790    ucnv_reset(cnv);
2791    for(;--i>0; ){
2792        uSource = (UChar*) source;
2793        uSourceLimit=(const UChar*)sourceLimit;
2794        cTarget = cBuf;
2795        uTarget = uBuf;
2796        cSource = cBuf;
2797        cTargetLimit = cBuf;
2798        uTargetLimit = uBuf;
2799
2800        do{
2801
2802            cTargetLimit = cTargetLimit+ i;
2803            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2804            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2805               errorCode=U_ZERO_ERROR;
2806                continue;
2807            }
2808
2809            if(U_FAILURE(errorCode)){
2810                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2811                return;
2812            }
2813
2814        }while (uSource<uSourceLimit);
2815
2816        cSourceLimit =cTarget;
2817        do{
2818            uTargetLimit=uTargetLimit+i;
2819            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2820            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2821               errorCode=U_ZERO_ERROR;
2822                continue;
2823            }
2824            if(U_FAILURE(errorCode)){
2825                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2826                    return;
2827            }
2828        }while(cSource<cSourceLimit);
2829
2830        uSource = source;
2831        /*test =uBuf;*/
2832        for(len=0;len<(int)(source - sourceLimit);len++){
2833            if(uBuf[len]!=uSource[len]){
2834                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2835            }
2836        }
2837    }
2838    free(uBuf);
2839    free(cBuf);
2840}
2841/* Test for Jitterbug 778 */
2842static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2843    const UChar* uSource;
2844    const UChar* uSourceLimit;
2845    const char* cSource;
2846    UChar *uTargetLimit =NULL;
2847    UChar *uTarget;
2848    char *cTarget;
2849    const char *cTargetLimit;
2850    char *cBuf;
2851    UChar *uBuf,*test;
2852    int32_t uBufSize = 120;
2853    int numCharsInTarget=0;
2854    UErrorCode errorCode=U_ZERO_ERROR;
2855    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2856    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2857    uSource = source;
2858    uSourceLimit=sourceLimit;
2859    cTarget = cBuf;
2860    cTargetLimit = cBuf +uBufSize*5;
2861    uTarget = uBuf;
2862    uTargetLimit = uBuf+ uBufSize*5;
2863    ucnv_reset(cnv);
2864    numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2865    if(U_FAILURE(errorCode)){
2866        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2867        return;
2868    }
2869    cSource = cBuf;
2870    test =uBuf;
2871    ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2872    if(U_FAILURE(errorCode)){
2873        log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2874        return;
2875    }
2876    uSource = source;
2877    while(uSource<uSourceLimit){
2878        if(*test!=*uSource){
2879
2880            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2881        }
2882        uSource++;
2883        test++;
2884    }
2885    free(uBuf);
2886    free(cBuf);
2887}
2888
2889static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2890    const UChar* uSource;
2891    const UChar* uSourceLimit;
2892    const char* cSource;
2893    const char* cSourceLimit;
2894    UChar *uTargetLimit =NULL;
2895    UChar *uTarget;
2896    char *cTarget;
2897    const char *cTargetLimit;
2898    char *cBuf;
2899    UChar *uBuf; /*,*test;*/
2900    int32_t uBufSize = 120;
2901    int len=0;
2902    int i=2;
2903    const UChar *temp = sourceLimit;
2904    UErrorCode errorCode=U_ZERO_ERROR;
2905    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2906    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2907
2908    ucnv_reset(cnv);
2909    for(;--i>0;){
2910        uSource = (UChar*) source;
2911        cTarget = cBuf;
2912        uTarget = uBuf;
2913        cSource = cBuf;
2914        cTargetLimit = cBuf;
2915        uTargetLimit = uBuf+uBufSize*5;
2916        cTargetLimit = cTargetLimit+uBufSize*10;
2917        uSourceLimit=uSource;
2918        do{
2919
2920            if (uSourceLimit < sourceLimit) {
2921                uSourceLimit = uSourceLimit+1;
2922            }
2923            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2924            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2925               errorCode=U_ZERO_ERROR;
2926                continue;
2927            }
2928
2929            if(U_FAILURE(errorCode)){
2930                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2931                return;
2932            }
2933
2934        }while (uSource<temp);
2935
2936        cSourceLimit =cBuf;
2937        do{
2938            if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2939                cSourceLimit = cSourceLimit+1;
2940            }
2941            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2942            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2943               errorCode=U_ZERO_ERROR;
2944                continue;
2945            }
2946            if(U_FAILURE(errorCode)){
2947                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2948                    return;
2949            }
2950        }while(cSource<cTarget);
2951
2952        uSource = source;
2953        /*test =uBuf;*/
2954        for(;len<(int)(source - sourceLimit);len++){
2955            if(uBuf[len]!=uSource[len]){
2956                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2957            }
2958        }
2959    }
2960    free(uBuf);
2961    free(cBuf);
2962}
2963static void
2964TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2965                     const uint16_t results[], const char* message){
2966/*     const char* s0; */
2967     const char* s=(char*)source;
2968     const uint16_t *r=results;
2969     UErrorCode errorCode=U_ZERO_ERROR;
2970     uint32_t c,exC;
2971     ucnv_reset(cnv);
2972     while(s<limit) {
2973	 /* s0=s; */
2974        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2975        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2976            break; /* no more significant input */
2977        } else if(U_FAILURE(errorCode)) {
2978            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2979            break;
2980        } else {
2981            if(U16_IS_LEAD(*r)){
2982                int i =0, len = 2;
2983                U16_NEXT(r, i, len, exC);
2984                r++;
2985            }else{
2986                exC = *r;
2987            }
2988            if(c!=(uint32_t)(exC))
2989                log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
2990        }
2991        r++;
2992    }
2993}
2994
2995static int TestJitterbug930(const char* enc){
2996    UErrorCode err = U_ZERO_ERROR;
2997    UConverter*converter;
2998    char out[80];
2999    char*target = out;
3000    UChar in[4];
3001    const UChar*source = in;
3002    int32_t off[80];
3003    int32_t* offsets = off;
3004    int numOffWritten=0;
3005    UBool flush = 0;
3006    converter = my_ucnv_open(enc, &err);
3007
3008    in[0] = 0x41;     /* 0x4E00;*/
3009    in[1] = 0x4E01;
3010    in[2] = 0x4E02;
3011    in[3] = 0x4E03;
3012
3013    memset(off, '*', sizeof(off));
3014
3015    ucnv_fromUnicode (converter,
3016            &target,
3017            target+2,
3018            &source,
3019            source+3,
3020            offsets,
3021            flush,
3022            &err);
3023
3024        /* writes three bytes into the output buffer: 41 1B 24
3025        * but offsets contains 0 1 1
3026    */
3027    while(*offsets< off[10]){
3028        numOffWritten++;
3029        offsets++;
3030    }
3031    log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3032    if(numOffWritten!= (int)(target-out)){
3033        log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3034    }
3035
3036    err = U_ZERO_ERROR;
3037
3038    memset(off,'*' , sizeof(off));
3039
3040    flush = 1;
3041    offsets=off;
3042    ucnv_fromUnicode (converter,
3043            &target,
3044            target+4,
3045            &source,
3046            source,
3047            offsets,
3048            flush,
3049            &err);
3050    numOffWritten=0;
3051    while(*offsets< off[10]){
3052        numOffWritten++;
3053        if(*offsets!= -1){
3054            log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3055        }
3056        offsets++;
3057    }
3058
3059    /* writes 42 43 7A into output buffer,
3060     * offsets contains -1 -1 -1
3061     */
3062    ucnv_close(converter);
3063    return 0;
3064}
3065
3066static void
3067TestHZ() {
3068    /* test input */
3069    static const uint16_t in[]={
3070            0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3071            0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3072            0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3073            0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3074            0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3075            0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3076            0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3077            0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3078            0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3079            0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3080            0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3081            0x005A, 0x005B, 0x005C, 0x000A
3082      };
3083    const UChar* uSource;
3084    const UChar* uSourceLimit;
3085    const char* cSource;
3086    const char* cSourceLimit;
3087    UChar *uTargetLimit =NULL;
3088    UChar *uTarget;
3089    char *cTarget;
3090    const char *cTargetLimit;
3091    char *cBuf;
3092    UChar *uBuf,*test;
3093    int32_t uBufSize = 120;
3094    UErrorCode errorCode=U_ZERO_ERROR;
3095    UConverter *cnv;
3096    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3097    int32_t* myOff= offsets;
3098    cnv=ucnv_open("HZ", &errorCode);
3099    if(U_FAILURE(errorCode)) {
3100        log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3101        return;
3102    }
3103
3104    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3105    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3106    uSource = (const UChar*)in;
3107    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3108    cTarget = cBuf;
3109    cTargetLimit = cBuf +uBufSize*5;
3110    uTarget = uBuf;
3111    uTargetLimit = uBuf+ uBufSize*5;
3112    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3113    if(U_FAILURE(errorCode)){
3114        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3115        return;
3116    }
3117    cSource = cBuf;
3118    cSourceLimit =cTarget;
3119    test =uBuf;
3120    myOff=offsets;
3121    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3122    if(U_FAILURE(errorCode)){
3123        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3124        return;
3125    }
3126    uSource = (const UChar*)in;
3127    while(uSource<uSourceLimit){
3128        if(*test!=*uSource){
3129
3130            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3131        }
3132        uSource++;
3133        test++;
3134    }
3135    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3136    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3137    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3138    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3139    TestJitterbug930("csISO2022JP");
3140    ucnv_close(cnv);
3141    free(offsets);
3142    free(uBuf);
3143    free(cBuf);
3144}
3145
3146static void
3147TestISCII(){
3148        /* test input */
3149    static const uint16_t in[]={
3150        /* test full range of Devanagari */
3151        0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3152        0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3153        0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3154        0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3155        0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3156        0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3157        0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3158        0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3159        0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3160        0x096D,0x096E,0x096F,
3161        /* test Soft halant*/
3162        0x0915,0x094d, 0x200D,
3163        /* test explicit halant */
3164        0x0915,0x094d, 0x200c,
3165        /* test double danda */
3166        0x965,
3167        /* test ASCII */
3168        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3169        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3170        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3171        /* tests from Lotus */
3172        0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3173        0x0930,0x094D,0x200D,
3174        0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3175        0x0915,0x0921,0x002B,0x095F,
3176        /* tamil range */
3177        0x0B86, 0xB87, 0xB88,
3178        /* telugu range */
3179        0x0C05, 0x0C02, 0x0C03,0x0c31,
3180        /* kannada range */
3181        0x0C85, 0xC82, 0x0C83,
3182        /* test Abbr sign and Anudatta */
3183        0x0970, 0x952,
3184       /* 0x0958,
3185        0x0959,
3186        0x095A,
3187        0x095B,
3188        0x095C,
3189        0x095D,
3190        0x095E,
3191        0x095F,*/
3192        0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3193        0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3194        0x090C ,
3195        0x0962,
3196        0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3197        0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3198        0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3199        0x093D /* Avagraha  0xEA, 0xE9*/,
3200        0x0958,
3201        0x0959,
3202        0x095A,
3203        0x095B,
3204        0x095C,
3205        0x095D,
3206        0x095E,
3207        0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3208      };
3209    static const unsigned char byteArr[]={
3210
3211        0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3212        0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3213        0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3214        0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3215        0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3216        0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3217        0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3218        0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3219        0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3220        0xf8,0xf9,0xfa,
3221        /* test soft halant */
3222        0xb3, 0xE8, 0xE9,
3223        /* test explicit halant */
3224        0xb3, 0xE8, 0xE8,
3225        /* test double danda */
3226        0xea, 0xea,
3227        /* test ASCII */
3228        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3229        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3230        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3231        /* test ATR code */
3232
3233        /* tests from Lotus */
3234        0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3235        0xEF,0x42,0xCF,0xE8,0xD9,
3236        0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3237        0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3238        /* tamil range */
3239        0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3240        /* telugu range */
3241        0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3242        /* kannada range */
3243        0xEF, 0x48,0xa4, 0xa2, 0xa3,
3244        /* anudatta and abbreviation sign */
3245        0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3246
3247
3248        0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3249
3250        0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3251
3252        0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3253
3254        0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3255
3256        0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3257
3258        0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3259
3260        0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3261
3262        0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3263
3264        0xB3, 0xE9, /* Ka + NUKTA */
3265
3266        0xB4, 0xE9, /* Kha + NUKTA */
3267
3268        0xB5, 0xE9, /* Ga + NUKTA */
3269
3270        0xBA, 0xE9,
3271
3272        0xBF, 0xE9,
3273
3274        0xC0, 0xE9,
3275
3276        0xC9, 0xE9,
3277        /* INV halant RA    */
3278        0xD9, 0xE8, 0xCF,
3279        0x00, 0x00A0,
3280        /* just consume unhandled codepoints */
3281        0xEF, 0x30,
3282
3283    };
3284    testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3285    TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3286
3287}
3288
3289static void
3290TestISO_2022_JP() {
3291    /* test input */
3292    static const uint16_t in[]={
3293        0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3294        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3295        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3296        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3297        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3298        0x201D, 0x3014, 0x000D, 0x000A,
3299        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3300        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3301        };
3302    const UChar* uSource;
3303    const UChar* uSourceLimit;
3304    const char* cSource;
3305    const char* cSourceLimit;
3306    UChar *uTargetLimit =NULL;
3307    UChar *uTarget;
3308    char *cTarget;
3309    const char *cTargetLimit;
3310    char *cBuf;
3311    UChar *uBuf,*test;
3312    int32_t uBufSize = 120;
3313    UErrorCode errorCode=U_ZERO_ERROR;
3314    UConverter *cnv;
3315    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3316    int32_t* myOff= offsets;
3317    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3318    if(U_FAILURE(errorCode)) {
3319        log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3320        return;
3321    }
3322
3323    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3324    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3325    uSource = (const UChar*)in;
3326    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3327    cTarget = cBuf;
3328    cTargetLimit = cBuf +uBufSize*5;
3329    uTarget = uBuf;
3330    uTargetLimit = uBuf+ uBufSize*5;
3331    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3332    if(U_FAILURE(errorCode)){
3333        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3334        return;
3335    }
3336    cSource = cBuf;
3337    cSourceLimit =cTarget;
3338    test =uBuf;
3339    myOff=offsets;
3340    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3341    if(U_FAILURE(errorCode)){
3342        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3343        return;
3344    }
3345
3346    uSource = (const UChar*)in;
3347    while(uSource<uSourceLimit){
3348        if(*test!=*uSource){
3349
3350            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3351        }
3352        uSource++;
3353        test++;
3354    }
3355
3356    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3357    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3358    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3359    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3360    TestJitterbug930("csISO2022JP");
3361    ucnv_close(cnv);
3362    free(uBuf);
3363    free(cBuf);
3364    free(offsets);
3365}
3366
3367static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3368    const UChar* uSource;
3369    const UChar* uSourceLimit;
3370    const char* cSource;
3371    const char* cSourceLimit;
3372    UChar *uTargetLimit =NULL;
3373    UChar *uTarget;
3374    char *cTarget;
3375    const char *cTargetLimit;
3376    char *cBuf;
3377    UChar *uBuf,*test;
3378    int32_t uBufSize = 120*10;
3379    UErrorCode errorCode=U_ZERO_ERROR;
3380    UConverter *cnv;
3381    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3382    int32_t* myOff= offsets;
3383    cnv=my_ucnv_open(conv, &errorCode);
3384    if(U_FAILURE(errorCode)) {
3385        log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3386        return;
3387    }
3388
3389    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3390    cBuf =(char*)malloc(uBufSize * sizeof(char));
3391    uSource = (const UChar*)in;
3392    uSourceLimit=uSource+len;
3393    cTarget = cBuf;
3394    cTargetLimit = cBuf +uBufSize;
3395    uTarget = uBuf;
3396    uTargetLimit = uBuf+ uBufSize;
3397    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3398    if(U_FAILURE(errorCode)){
3399        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3400        return;
3401    }
3402    /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3403    cSource = cBuf;
3404    cSourceLimit =cTarget;
3405    test =uBuf;
3406    myOff=offsets;
3407    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3408    if(U_FAILURE(errorCode)){
3409        log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3410        return;
3411    }
3412
3413    uSource = (const UChar*)in;
3414    while(uSource<uSourceLimit){
3415        if(*test!=*uSource){
3416            log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3417        }
3418        uSource++;
3419        test++;
3420    }
3421    TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3422    TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3423    TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3424    if(byteArr && byteArrLen!=0){
3425        TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3426        TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3427        {
3428            cSource = byteArr;
3429            cSourceLimit = cSource+byteArrLen;
3430            test=uBuf;
3431            myOff = offsets;
3432            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3433            if(U_FAILURE(errorCode)){
3434                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3435                return;
3436            }
3437
3438            uSource = (const UChar*)in;
3439            while(uSource<uSourceLimit){
3440                if(*test!=*uSource){
3441                    log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3442                }
3443                uSource++;
3444                test++;
3445            }
3446        }
3447    }
3448
3449    ucnv_close(cnv);
3450    free(uBuf);
3451    free(cBuf);
3452    free(offsets);
3453}
3454static UChar U_CALLCONV
3455_charAt(int32_t offset, void *context) {
3456    return ((char*)context)[offset];
3457}
3458
3459static int32_t
3460unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3461    int32_t srcIndex=0;
3462    int32_t dstIndex=0;
3463    if(U_FAILURE(*status)){
3464        return 0;
3465    }
3466    if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3467        *status = U_ILLEGAL_ARGUMENT_ERROR;
3468        return 0;
3469    }
3470    if(srcLen==-1){
3471        srcLen = (int32_t)uprv_strlen(src);
3472    }
3473
3474    for (; srcIndex<srcLen; ) {
3475        UChar32 c = src[srcIndex++];
3476        if (c == 0x005C /*'\\'*/) {
3477            c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3478            if (c == (UChar32)0xFFFFFFFF) {
3479                *status=U_INVALID_CHAR_FOUND; /* return empty string */
3480                break; /* invalid escape sequence */
3481            }
3482        }
3483        if(dstIndex < dstLen){
3484            if(c>0xFFFF){
3485               dst[dstIndex++] = U16_LEAD(c);
3486               if(dstIndex<dstLen){
3487                    dst[dstIndex]=U16_TRAIL(c);
3488               }else{
3489                   *status=U_BUFFER_OVERFLOW_ERROR;
3490               }
3491            }else{
3492                dst[dstIndex]=(UChar)c;
3493            }
3494
3495        }else{
3496            *status = U_BUFFER_OVERFLOW_ERROR;
3497        }
3498        dstIndex++; /* for preflighting */
3499    }
3500    return dstIndex;
3501}
3502
3503static void
3504TestFullRoundtrip(const char* cp){
3505    UChar usource[10] ={0};
3506    UChar nsrc[10] = {0};
3507    uint32_t i=1;
3508    int len=0, ulen;
3509    nsrc[0]=0x0061;
3510    /* Test codepoint 0 */
3511    TestConv(usource,1,cp,"",NULL,0);
3512    TestConv(usource,2,cp,"",NULL,0);
3513    nsrc[2]=0x5555;
3514    TestConv(nsrc,3,cp,"",NULL,0);
3515
3516    for(;i<=0x10FFFF;i++){
3517        if(i==0xD800){
3518            i=0xDFFF;
3519            continue;
3520        }
3521        if(i<=0xFFFF){
3522            usource[0] =(UChar) i;
3523            len=1;
3524        }else{
3525            usource[0]=U16_LEAD(i);
3526            usource[1]=U16_TRAIL(i);
3527            len=2;
3528        }
3529        ulen=len;
3530        if(i==0x80) {
3531            usource[2]=0;
3532        }
3533        /* Test only single code points */
3534        TestConv(usource,ulen,cp,"",NULL,0);
3535        /* Test codepoint repeated twice */
3536        usource[ulen]=usource[0];
3537        usource[ulen+1]=usource[1];
3538        ulen+=len;
3539        TestConv(usource,ulen,cp,"",NULL,0);
3540        /* Test codepoint repeated 3 times */
3541        usource[ulen]=usource[0];
3542        usource[ulen+1]=usource[1];
3543        ulen+=len;
3544        TestConv(usource,ulen,cp,"",NULL,0);
3545        /* Test codepoint in between 2 codepoints */
3546        nsrc[1]=usource[0];
3547        nsrc[2]=usource[1];
3548        nsrc[len+1]=0x5555;
3549        TestConv(nsrc,len+2,cp,"",NULL,0);
3550        uprv_memset(usource,0,sizeof(UChar)*10);
3551    }
3552}
3553
3554static void
3555TestRoundTrippingAllUTF(void){
3556    if(!getTestOption(QUICK_OPTION)){
3557        log_verbose("Running exhaustive round trip test for BOCU-1\n");
3558        TestFullRoundtrip("BOCU-1");
3559        log_verbose("Running exhaustive round trip test for SCSU\n");
3560        TestFullRoundtrip("SCSU");
3561        log_verbose("Running exhaustive round trip test for UTF-8\n");
3562        TestFullRoundtrip("UTF-8");
3563        log_verbose("Running exhaustive round trip test for CESU-8\n");
3564        TestFullRoundtrip("CESU-8");
3565        log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3566        TestFullRoundtrip("UTF-16BE");
3567        log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3568        TestFullRoundtrip("UTF-16LE");
3569        log_verbose("Running exhaustive round trip test for UTF-16\n");
3570        TestFullRoundtrip("UTF-16");
3571        log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3572        TestFullRoundtrip("UTF-32BE");
3573        log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3574        TestFullRoundtrip("UTF-32LE");
3575        log_verbose("Running exhaustive round trip test for UTF-32\n");
3576        TestFullRoundtrip("UTF-32");
3577        log_verbose("Running exhaustive round trip test for UTF-7\n");
3578        TestFullRoundtrip("UTF-7");
3579        log_verbose("Running exhaustive round trip test for UTF-7\n");
3580        TestFullRoundtrip("UTF-7,version=1");
3581        log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3582        TestFullRoundtrip("IMAP-mailbox-name");
3583        /*
3584         *
3585         * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3586         * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3587         * The old mappings remain as fallbacks.
3588         * This test may be reintroduced at a later time.
3589         *
3590         * 110118 - mow
3591         */
3592         /*
3593         log_verbose("Running exhaustive round trip test for GB18030\n");
3594         TestFullRoundtrip("GB18030");
3595         */
3596    }
3597}
3598
3599static void
3600TestSCSU() {
3601
3602    static const uint16_t germanUTF16[]={
3603        0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3604    };
3605
3606    static const uint8_t germanSCSU[]={
3607        0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3608    };
3609
3610    static const uint16_t russianUTF16[]={
3611        0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3612    };
3613
3614    static const uint8_t russianSCSU[]={
3615        0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3616    };
3617
3618    static const uint16_t japaneseUTF16[]={
3619        0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3620        0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3621        0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3622        0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3623        0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3624        0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3625        0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3626        0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3627        0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3628        0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3629        0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3630        0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3631        0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3632        0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3633        0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3634    };
3635
3636    /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3637     it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3638    static const uint8_t japaneseSCSU[]={
3639        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3640        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3641        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3642        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3643        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3644        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3645        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3646        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3647        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3648        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3649        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3650        0xcb, 0x82
3651    };
3652
3653    static const uint16_t allFeaturesUTF16[]={
3654        0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3655        0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3656        0x01df, 0xf000, 0xdbff, 0xdfff
3657    };
3658
3659    /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3660     * result here (34B vs. 35B)
3661     */
3662    static const uint8_t allFeaturesSCSU[]={
3663        0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3664        0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3665        0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3666        0xdf, 0x14, 0x80, 0x15, 0xff
3667    };
3668    static const uint16_t monkeyIn[]={
3669        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3670        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3671        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3672        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3673        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3674        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3675        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3676        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3677        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3678        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3679        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3680        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3681        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3682        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3683        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3684        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3685        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3686        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3687        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3688        /* test non-BMP code points */
3689        0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3690        0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3691        0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3692        0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3693        0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3694        0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3695        0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3696        0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3697        0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3698        0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3699        0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3700
3701
3702        0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3703        0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3704        0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3705        0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3706        0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3707    };
3708    static const char *fTestCases [] = {
3709          "\\ud800\\udc00", /* smallest surrogate*/
3710          "\\ud8ff\\udcff",
3711          "\\udBff\\udFff", /* largest surrogate pair*/
3712          "\\ud834\\udc00",
3713          "\\U0010FFFF",
3714          "Hello \\u9292 \\u9192 World!",
3715          "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3716          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3717
3718          "\\u0648\\u06c8", /* catch missing reset*/
3719          "\\u0648\\u06c8",
3720
3721          "\\u4444\\uE001", /* lowest quotable*/
3722          "\\u4444\\uf2FF", /* highest quotable*/
3723          "\\u4444\\uf188\\u4444",
3724          "\\u4444\\uf188\\uf288",
3725          "\\u4444\\uf188abc\\u0429\\uf288",
3726          "\\u9292\\u2222",
3727          "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3728          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3729          "Hello World!123456",
3730          "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3731
3732          "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3733          "abc\\u4411d",      /* uses SQU*/
3734          "abc\\u4411\\u4412d",/* uses SCU*/
3735          "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3736          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3737          "\\u9292\\u2222",
3738          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3739          "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3740          "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3741
3742          "", /* empty input*/
3743          "\\u0000", /* smallest BMP character*/
3744          "\\uFFFF", /* largest BMP character*/
3745
3746          /* regression tests*/
3747          "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3748          "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3749          "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3750          "\\u0041\\u00df\\u0401\\u015f",
3751          "\\u9066\\u2123abc",
3752          "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3753          "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3754    };
3755    int i=0;
3756    for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3757        const char* cSrc = fTestCases[i];
3758        UErrorCode status = U_ZERO_ERROR;
3759        int32_t cSrcLen,srcLen;
3760        UChar* src;
3761        /* UConverter* cnv = ucnv_open("SCSU",&status); */
3762        cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3763        src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3764        srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3765        log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3766        TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3767        free(src);
3768    }
3769    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3770    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3771    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3772    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3773    TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3774    TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3775    TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3776}
3777
3778#if !UCONFIG_NO_LEGACY_CONVERSION
3779static void TestJitterbug2346(){
3780    char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3781                      0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3782    uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3783
3784    UChar uTarget[500]={'\0'};
3785    UChar* utarget=uTarget;
3786    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3787
3788    char cTarget[500]={'\0'};
3789    char* ctarget=cTarget;
3790    char* ctargetLimit=cTarget+sizeof(cTarget);
3791    const char* csource=source;
3792    UChar* temp = expected;
3793    UErrorCode err=U_ZERO_ERROR;
3794
3795    UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3796    if(U_FAILURE(err)) {
3797        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3798        return;
3799    }
3800    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3801    if(U_FAILURE(err)) {
3802        log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3803        return;
3804    }
3805    utargetLimit=utarget;
3806    utarget = uTarget;
3807    while(utarget<utargetLimit){
3808        if(*temp!=*utarget){
3809
3810            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3811        }
3812        utarget++;
3813        temp++;
3814    }
3815    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3816    if(U_FAILURE(err)) {
3817        log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3818        return;
3819    }
3820    ctargetLimit=ctarget;
3821    ctarget =cTarget;
3822    ucnv_close(conv);
3823
3824
3825}
3826
3827static void
3828TestISO_2022_JP_1() {
3829    /* test input */
3830    static const uint16_t in[]={
3831        0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3832        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3833        0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3834        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3835        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3836        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3837        0x201D, 0x000D, 0x000A,
3838        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3839        0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3840        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3841        0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3842        0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3843        0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3844      };
3845    const UChar* uSource;
3846    const UChar* uSourceLimit;
3847    const char* cSource;
3848    const char* cSourceLimit;
3849    UChar *uTargetLimit =NULL;
3850    UChar *uTarget;
3851    char *cTarget;
3852    const char *cTargetLimit;
3853    char *cBuf;
3854    UChar *uBuf,*test;
3855    int32_t uBufSize = 120;
3856    UErrorCode errorCode=U_ZERO_ERROR;
3857    UConverter *cnv;
3858
3859    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3860    if(U_FAILURE(errorCode)) {
3861        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3862        return;
3863    }
3864
3865    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3866    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3867    uSource = (const UChar*)in;
3868    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3869    cTarget = cBuf;
3870    cTargetLimit = cBuf +uBufSize*5;
3871    uTarget = uBuf;
3872    uTargetLimit = uBuf+ uBufSize*5;
3873    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3874    if(U_FAILURE(errorCode)){
3875        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3876        return;
3877    }
3878    cSource = cBuf;
3879    cSourceLimit =cTarget;
3880    test =uBuf;
3881    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3882    if(U_FAILURE(errorCode)){
3883        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3884        return;
3885    }
3886    uSource = (const UChar*)in;
3887    while(uSource<uSourceLimit){
3888        if(*test!=*uSource){
3889
3890            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3891        }
3892        uSource++;
3893        test++;
3894    }
3895    /*ucnv_close(cnv);
3896    cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3897    /*Test for the condition where there is an invalid character*/
3898    ucnv_reset(cnv);
3899    {
3900        static const uint8_t source2[]={0x0e,0x24,0x053};
3901        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3902    }
3903    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3904    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3905    ucnv_close(cnv);
3906    free(uBuf);
3907    free(cBuf);
3908}
3909
3910static void
3911TestISO_2022_JP_2() {
3912    /* test input */
3913    static const uint16_t in[]={
3914        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3915        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3916        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3917        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3918        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3919        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3920        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3921        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3922        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3923        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3924        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3925        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3926        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3927        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3928        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3929        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3930        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3931        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3932        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3933      };
3934    const UChar* uSource;
3935    const UChar* uSourceLimit;
3936    const char* cSource;
3937    const char* cSourceLimit;
3938    UChar *uTargetLimit =NULL;
3939    UChar *uTarget;
3940    char *cTarget;
3941    const char *cTargetLimit;
3942    char *cBuf;
3943    UChar *uBuf,*test;
3944    int32_t uBufSize = 120;
3945    UErrorCode errorCode=U_ZERO_ERROR;
3946    UConverter *cnv;
3947    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3948    int32_t* myOff= offsets;
3949    cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3950    if(U_FAILURE(errorCode)) {
3951        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3952        return;
3953    }
3954
3955    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3956    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3957    uSource = (const UChar*)in;
3958    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3959    cTarget = cBuf;
3960    cTargetLimit = cBuf +uBufSize*5;
3961    uTarget = uBuf;
3962    uTargetLimit = uBuf+ uBufSize*5;
3963    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3964    if(U_FAILURE(errorCode)){
3965        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3966        return;
3967    }
3968    cSource = cBuf;
3969    cSourceLimit =cTarget;
3970    test =uBuf;
3971    myOff=offsets;
3972    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3973    if(U_FAILURE(errorCode)){
3974        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3975        return;
3976    }
3977    uSource = (const UChar*)in;
3978    while(uSource<uSourceLimit){
3979        if(*test!=*uSource){
3980
3981            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3982        }
3983        uSource++;
3984        test++;
3985    }
3986    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3987    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3988    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3989    /*Test for the condition where there is an invalid character*/
3990    ucnv_reset(cnv);
3991    {
3992        static const uint8_t source2[]={0x0e,0x24,0x053};
3993        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3994    }
3995    ucnv_close(cnv);
3996    free(uBuf);
3997    free(cBuf);
3998    free(offsets);
3999}
4000
4001static void
4002TestISO_2022_KR() {
4003    /* test input */
4004    static const uint16_t in[]={
4005                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4006                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4007                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4008                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4009                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4010                   ,0x53E3,0x53E4,0x000A,0x000D};
4011    const UChar* uSource;
4012    const UChar* uSourceLimit;
4013    const char* cSource;
4014    const char* cSourceLimit;
4015    UChar *uTargetLimit =NULL;
4016    UChar *uTarget;
4017    char *cTarget;
4018    const char *cTargetLimit;
4019    char *cBuf;
4020    UChar *uBuf,*test;
4021    int32_t uBufSize = 120;
4022    UErrorCode errorCode=U_ZERO_ERROR;
4023    UConverter *cnv;
4024    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4025    int32_t* myOff= offsets;
4026    cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4027    if(U_FAILURE(errorCode)) {
4028        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4029        return;
4030    }
4031
4032    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4033    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4034    uSource = (const UChar*)in;
4035    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4036    cTarget = cBuf;
4037    cTargetLimit = cBuf +uBufSize*5;
4038    uTarget = uBuf;
4039    uTargetLimit = uBuf+ uBufSize*5;
4040    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4041    if(U_FAILURE(errorCode)){
4042        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4043        return;
4044    }
4045    cSource = cBuf;
4046    cSourceLimit =cTarget;
4047    test =uBuf;
4048    myOff=offsets;
4049    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4050    if(U_FAILURE(errorCode)){
4051        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4052        return;
4053    }
4054    uSource = (const UChar*)in;
4055    while(uSource<uSourceLimit){
4056        if(*test!=*uSource){
4057            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4058        }
4059        uSource++;
4060        test++;
4061    }
4062    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4063    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4064    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4065    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4066    TestJitterbug930("csISO2022KR");
4067    /*Test for the condition where there is an invalid character*/
4068    ucnv_reset(cnv);
4069    {
4070        static const uint8_t source2[]={0x1b,0x24,0x053};
4071        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4072        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4073    }
4074    ucnv_close(cnv);
4075    free(uBuf);
4076    free(cBuf);
4077    free(offsets);
4078}
4079
4080static void
4081TestISO_2022_KR_1() {
4082    /* test input */
4083    static const uint16_t in[]={
4084                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4085                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4086                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4087                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4088                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4089                   ,0x53E3,0x53E4,0x000A,0x000D};
4090    const UChar* uSource;
4091    const UChar* uSourceLimit;
4092    const char* cSource;
4093    const char* cSourceLimit;
4094    UChar *uTargetLimit =NULL;
4095    UChar *uTarget;
4096    char *cTarget;
4097    const char *cTargetLimit;
4098    char *cBuf;
4099    UChar *uBuf,*test;
4100    int32_t uBufSize = 120;
4101    UErrorCode errorCode=U_ZERO_ERROR;
4102    UConverter *cnv;
4103    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4104    int32_t* myOff= offsets;
4105    cnv=ucnv_open("ibm-25546", &errorCode);
4106    if(U_FAILURE(errorCode)) {
4107        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4108        return;
4109    }
4110
4111    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4112    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4113    uSource = (const UChar*)in;
4114    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4115    cTarget = cBuf;
4116    cTargetLimit = cBuf +uBufSize*5;
4117    uTarget = uBuf;
4118    uTargetLimit = uBuf+ uBufSize*5;
4119    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4120    if(U_FAILURE(errorCode)){
4121        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4122        return;
4123    }
4124    cSource = cBuf;
4125    cSourceLimit =cTarget;
4126    test =uBuf;
4127    myOff=offsets;
4128    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4129    if(U_FAILURE(errorCode)){
4130        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4131        return;
4132    }
4133    uSource = (const UChar*)in;
4134    while(uSource<uSourceLimit){
4135        if(*test!=*uSource){
4136            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4137        }
4138        uSource++;
4139        test++;
4140    }
4141    ucnv_reset(cnv);
4142    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4143    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4144    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4145    ucnv_reset(cnv);
4146    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4147        /*Test for the condition where there is an invalid character*/
4148    ucnv_reset(cnv);
4149    {
4150        static const uint8_t source2[]={0x1b,0x24,0x053};
4151        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4152        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4153    }
4154    ucnv_close(cnv);
4155    free(uBuf);
4156    free(cBuf);
4157    free(offsets);
4158}
4159
4160static void TestJitterbug2411(){
4161    static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4162                         "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4163    UConverter* kr=NULL, *kr1=NULL;
4164    UErrorCode errorCode = U_ZERO_ERROR;
4165    UChar tgt[100]={'\0'};
4166    UChar* target = tgt;
4167    UChar* targetLimit = target+100;
4168    kr=ucnv_open("iso-2022-kr", &errorCode);
4169    if(U_FAILURE(errorCode)) {
4170        log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4171        return;
4172    }
4173    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4174    if(U_FAILURE(errorCode)) {
4175        log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4176        return;
4177    }
4178    kr1 = ucnv_open("ibm-25546", &errorCode);
4179    if(U_FAILURE(errorCode)) {
4180        log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4181        return;
4182    }
4183    target = tgt;
4184    targetLimit = target+100;
4185    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4186
4187    if(U_FAILURE(errorCode)) {
4188        log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4189        return;
4190    }
4191
4192    ucnv_close(kr);
4193    ucnv_close(kr1);
4194
4195}
4196
4197static void
4198TestJIS(){
4199    /* From Unicode moved to testdata/conversion.txt */
4200    /*To Unicode*/
4201    {
4202        static const uint8_t sampleTextJIS[] = {
4203            0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4204            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4205            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4206        };
4207        static const uint16_t expectedISO2022JIS[] = {
4208            0x0041, 0x0042,
4209            0xFF81, 0xFF82,
4210            0x3000
4211        };
4212        static const int32_t  toISO2022JISOffs[]={
4213            3,4,
4214            8,9,
4215            16
4216        };
4217
4218        static const uint8_t sampleTextJIS7[] = {
4219            0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4220            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4221            0x1b,0x24,0x42,0x21,0x21,
4222            0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4223            0x21,0x22,
4224            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4225        };
4226        static const uint16_t expectedISO2022JIS7[] = {
4227            0x0041, 0x0042,
4228            0xFF81, 0xFF82,
4229            0x3000,
4230            0xFF81, 0xFF82,
4231            0x3001,
4232            0x3000
4233        };
4234        static const int32_t  toISO2022JIS7Offs[]={
4235            3,4,
4236            8,9,
4237            13,16,
4238            17,
4239            19,27
4240        };
4241        static const uint8_t sampleTextJIS8[] = {
4242            0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4243            0xa1,0xc8,0xd9,/*Katakana Set*/
4244            0x1b,0x28,0x42,
4245            0x41,0x42,
4246            0xb1,0xc3, /*Katakana Set*/
4247            0x1b,0x24,0x42,0x21,0x21
4248        };
4249        static const uint16_t expectedISO2022JIS8[] = {
4250            0x0041, 0x0042,
4251            0xff61, 0xff88, 0xff99,
4252            0x0041, 0x0042,
4253            0xff71, 0xff83,
4254            0x3000
4255        };
4256        static const int32_t  toISO2022JIS8Offs[]={
4257            3, 4,  5,  6,
4258            7, 11, 12, 13,
4259            14, 18,
4260        };
4261
4262        testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4263            sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4264        testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4265            sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4266        testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4267            sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4268    }
4269
4270}
4271
4272
4273#if 0
4274 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4275
4276static void TestJitterbug915(){
4277/* tests for roundtripping of the below sequence
4278\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4279\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4280\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4281\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4282\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4283\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4284\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4285*/
4286    static const char cSource[]={
4287        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4288        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4289        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4290        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4291        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4292        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4293        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4294        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4295        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4296        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4297        0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4298        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4299        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4300        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4301        0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4302        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4303        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4304        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4305        0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4306        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4307        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4308        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4309        0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4310        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4311        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4312        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4313        0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4314        0x37, 0x20, 0x2A, 0x2F
4315    };
4316    UChar uTarget[500]={'\0'};
4317    UChar* utarget=uTarget;
4318    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4319
4320    char cTarget[500]={'\0'};
4321    char* ctarget=cTarget;
4322    char* ctargetLimit=cTarget+sizeof(cTarget);
4323    const char* csource=cSource;
4324    const char* tempSrc = cSource;
4325    UErrorCode err=U_ZERO_ERROR;
4326
4327    UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4328    if(U_FAILURE(err)) {
4329        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4330        return;
4331    }
4332    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4333    if(U_FAILURE(err)) {
4334        log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4335        return;
4336    }
4337    utargetLimit=utarget;
4338    utarget = uTarget;
4339    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4340    if(U_FAILURE(err)) {
4341        log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4342        return;
4343    }
4344    ctargetLimit=ctarget;
4345    ctarget =cTarget;
4346    while(ctarget<ctargetLimit){
4347        if(*ctarget != *tempSrc){
4348            log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4349        }
4350        ++ctarget;
4351        ++tempSrc;
4352    }
4353
4354    ucnv_close(conv);
4355}
4356
4357static void
4358TestISO_2022_CN_EXT() {
4359    /* test input */
4360    static const uint16_t in[]={
4361                /* test Non-BMP code points */
4362         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4363         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4364         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4365         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4366         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4367         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4368         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4369         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4370         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4371         0xD869, 0xDED5,
4372
4373         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4374         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4375         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4376         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4377         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4378         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4379         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4380         0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4381         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4382         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4383         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4384         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4385         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4386         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4387         0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4388         0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4389         0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4390         0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4391
4392         0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4393
4394      };
4395
4396    const UChar* uSource;
4397    const UChar* uSourceLimit;
4398    const char* cSource;
4399    const char* cSourceLimit;
4400    UChar *uTargetLimit =NULL;
4401    UChar *uTarget;
4402    char *cTarget;
4403    const char *cTargetLimit;
4404    char *cBuf;
4405    UChar *uBuf,*test;
4406    int32_t uBufSize = 180;
4407    UErrorCode errorCode=U_ZERO_ERROR;
4408    UConverter *cnv;
4409    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4410    int32_t* myOff= offsets;
4411    cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4412    if(U_FAILURE(errorCode)) {
4413        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4414        return;
4415    }
4416
4417    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4418    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4419    uSource = (const UChar*)in;
4420    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4421    cTarget = cBuf;
4422    cTargetLimit = cBuf +uBufSize*5;
4423    uTarget = uBuf;
4424    uTargetLimit = uBuf+ uBufSize*5;
4425    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4426    if(U_FAILURE(errorCode)){
4427        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4428        return;
4429    }
4430    cSource = cBuf;
4431    cSourceLimit =cTarget;
4432    test =uBuf;
4433    myOff=offsets;
4434    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4435    if(U_FAILURE(errorCode)){
4436        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4437        return;
4438    }
4439    uSource = (const UChar*)in;
4440    while(uSource<uSourceLimit){
4441        if(*test!=*uSource){
4442            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4443        }
4444        else{
4445            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4446        }
4447        uSource++;
4448        test++;
4449    }
4450    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4451    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4452    /*Test for the condition where there is an invalid character*/
4453    ucnv_reset(cnv);
4454    {
4455        static const uint8_t source2[]={0x0e,0x24,0x053};
4456        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4457    }
4458    ucnv_close(cnv);
4459    free(uBuf);
4460    free(cBuf);
4461    free(offsets);
4462}
4463#endif
4464
4465static void
4466TestISO_2022_CN() {
4467    /* test input */
4468    static const uint16_t in[]={
4469         /* jitterbug 951 */
4470         0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4471         0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4472         0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4473         0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4474         0x0020, 0x0045, 0x004e, 0x0044,
4475         /**/
4476         0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4477         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4478         0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4479         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4480         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4481         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4482         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4483         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4484         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4485         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4486         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4487         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4488         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4489         0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4490         0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4491         0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4492         0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4493
4494      };
4495    const UChar* uSource;
4496    const UChar* uSourceLimit;
4497    const char* cSource;
4498    const char* cSourceLimit;
4499    UChar *uTargetLimit =NULL;
4500    UChar *uTarget;
4501    char *cTarget;
4502    const char *cTargetLimit;
4503    char *cBuf;
4504    UChar *uBuf,*test;
4505    int32_t uBufSize = 180;
4506    UErrorCode errorCode=U_ZERO_ERROR;
4507    UConverter *cnv;
4508    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4509    int32_t* myOff= offsets;
4510    cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4511    if(U_FAILURE(errorCode)) {
4512        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4513        return;
4514    }
4515
4516    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4517    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4518    uSource = (const UChar*)in;
4519    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4520    cTarget = cBuf;
4521    cTargetLimit = cBuf +uBufSize*5;
4522    uTarget = uBuf;
4523    uTargetLimit = uBuf+ uBufSize*5;
4524    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4525    if(U_FAILURE(errorCode)){
4526        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4527        return;
4528    }
4529    cSource = cBuf;
4530    cSourceLimit =cTarget;
4531    test =uBuf;
4532    myOff=offsets;
4533    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4534    if(U_FAILURE(errorCode)){
4535        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4536        return;
4537    }
4538    uSource = (const UChar*)in;
4539    while(uSource<uSourceLimit){
4540        if(*test!=*uSource){
4541            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4542        }
4543        else{
4544            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4545        }
4546        uSource++;
4547        test++;
4548    }
4549    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4550    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4551    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4552    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4553    TestJitterbug930("csISO2022CN");
4554    /*Test for the condition where there is an invalid character*/
4555    ucnv_reset(cnv);
4556    {
4557        static const uint8_t source2[]={0x0e,0x24,0x053};
4558        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4559    }
4560
4561    ucnv_close(cnv);
4562    free(uBuf);
4563    free(cBuf);
4564    free(offsets);
4565}
4566
4567/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4568typedef struct {
4569    const char *    converterName;
4570    const char *    inputText;
4571    int             inputTextLength;
4572} EmptySegmentTest;
4573
4574/* Callback for TestJitterbug6175, should only get called for empty segment errors */
4575static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4576                                             int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4577    if (reason > UCNV_IRREGULAR) {
4578        return;
4579    }
4580    if (reason != UCNV_IRREGULAR) {
4581        log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4582    }
4583    /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4584    *err = U_ZERO_ERROR;
4585    ucnv_cbToUWriteSub(toArgs,0,err);
4586}
4587
4588enum { kEmptySegmentToUCharsMax = 64 };
4589static void TestJitterbug6175(void) {
4590    static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4591    static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4592    static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4593    static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4594    static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4595    static const EmptySegmentTest emptySegmentTests[] = {
4596        /* converterName inputText    inputTextLength */
4597        { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4598        { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4599        { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4600        { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4601        { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4602        /* terminator: */
4603        { NULL,          NULL,        0,                  }
4604    };
4605    const EmptySegmentTest * testPtr;
4606    for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4607        UErrorCode   err = U_ZERO_ERROR;
4608        UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4609        if (U_FAILURE(err)) {
4610            log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4611            return;
4612        }
4613        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4614        if (U_FAILURE(err)) {
4615            log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4616            ucnv_close(cnv);
4617            return;
4618        }
4619        {
4620            UChar         toUChars[kEmptySegmentToUCharsMax];
4621            UChar *       toUCharsPtr = toUChars;
4622            const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4623            const char *  inCharsPtr = testPtr->inputText;
4624            const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4625            ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4626        }
4627        ucnv_close(cnv);
4628    }
4629}
4630
4631static void
4632TestEBCDIC_STATEFUL() {
4633    /* test input */
4634    static const uint8_t in[]={
4635        0x61,
4636        0x1a,
4637        0x0f, 0x4b,
4638        0x42,
4639        0x40,
4640        0x36,
4641    };
4642
4643    /* expected test results */
4644    static const int32_t results[]={
4645        /* number of bytes read, code point */
4646        1, 0x002f,
4647        1, 0x0092,
4648        2, 0x002e,
4649        1, 0xff62,
4650        1, 0x0020,
4651        1, 0x0096,
4652
4653    };
4654    static const uint8_t in2[]={
4655        0x0f,
4656        0xa1,
4657        0x01
4658    };
4659
4660    /* expected test results */
4661    static const int32_t results2[]={
4662        /* number of bytes read, code point */
4663        2, 0x203E,
4664        1, 0x0001,
4665    };
4666
4667    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4668    UErrorCode errorCode=U_ZERO_ERROR;
4669    UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4670    if(U_FAILURE(errorCode)) {
4671        log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4672        return;
4673    }
4674    TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4675    ucnv_reset(cnv);
4676     /* Test the condition when source >= sourceLimit */
4677    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4678    ucnv_reset(cnv);
4679    /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4680    {
4681        static const uint8_t source1[]={0x0f};
4682        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4683    }
4684    /*Test for the condition where there is an invalid character*/
4685    ucnv_reset(cnv);
4686    {
4687        static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4688        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4689    }
4690    ucnv_reset(cnv);
4691    source=(const char*)in2;
4692    limit=(const char*)in2+sizeof(in2);
4693    TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4694    ucnv_close(cnv);
4695
4696}
4697
4698static void
4699TestGB18030() {
4700    /* test input */
4701    static const uint8_t in[]={
4702        0x24,
4703        0x7f,
4704        0x81, 0x30, 0x81, 0x30,
4705        0xa8, 0xbf,
4706        0xa2, 0xe3,
4707        0xd2, 0xbb,
4708        0x82, 0x35, 0x8f, 0x33,
4709        0x84, 0x31, 0xa4, 0x39,
4710        0x90, 0x30, 0x81, 0x30,
4711        0xe3, 0x32, 0x9a, 0x35
4712#if 0
4713        /*
4714         * Feature removed   markus 2000-oct-26
4715         * Only some codepages must match surrogate pairs into supplementary code points -
4716         * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4717         * GB 18030 provides direct encodings for supplementary code points, therefore
4718         * it must not combine two single-encoded surrogates into one code point.
4719         */
4720        0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4721#endif
4722    };
4723
4724    /* expected test results */
4725    static const int32_t results[]={
4726        /* number of bytes read, code point */
4727        1, 0x24,
4728        1, 0x7f,
4729        4, 0x80,
4730        2, 0x1f9,
4731        2, 0x20ac,
4732        2, 0x4e00,
4733        4, 0x9fa6,
4734        4, 0xffff,
4735        4, 0x10000,
4736        4, 0x10ffff
4737#if 0
4738        /* Feature removed. See comment above. */
4739        8, 0x10000
4740#endif
4741    };
4742
4743/*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4744    UErrorCode errorCode=U_ZERO_ERROR;
4745    UConverter *cnv=ucnv_open("gb18030", &errorCode);
4746    if(U_FAILURE(errorCode)) {
4747        log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4748        return;
4749    }
4750    TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4751    ucnv_close(cnv);
4752}
4753
4754static void
4755TestLMBCS() {
4756    /* LMBCS-1 string */
4757    static const uint8_t pszLMBCS[]={
4758        0x61,
4759        0x01, 0x29,
4760        0x81,
4761        0xA0,
4762        0x0F, 0x27,
4763        0x0F, 0x91,
4764        0x14, 0x0a, 0x74,
4765        0x14, 0xF6, 0x02,
4766        0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4767        0x10, 0x88, 0xA0,
4768    };
4769
4770    /* Unicode UChar32 equivalents */
4771    static const UChar32 pszUnicode32[]={
4772        /* code point */
4773        0x00000061,
4774        0x00002013,
4775        0x000000FC,
4776        0x000000E1,
4777        0x00000007,
4778        0x00000091,
4779        0x00000a74,
4780        0x00000200,
4781        0x00023456, /* code point for surrogate pair */
4782        0x00005516
4783    };
4784
4785/* Unicode UChar equivalents */
4786    static const UChar pszUnicode[]={
4787        /* code point */
4788        0x0061,
4789        0x2013,
4790        0x00FC,
4791        0x00E1,
4792        0x0007,
4793        0x0091,
4794        0x0a74,
4795        0x0200,
4796        0xD84D, /* low surrogate */
4797        0xDC56, /* high surrogate */
4798        0x5516
4799    };
4800
4801/* expected test results */
4802    static const int offsets32[]={
4803        /* number of bytes read, code point */
4804        0,
4805        1,
4806        3,
4807        4,
4808        5,
4809        7,
4810        9,
4811        12,
4812        15,
4813        21,
4814        24
4815    };
4816
4817/* expected test results */
4818    static const int offsets[]={
4819        /* number of bytes read, code point */
4820        0,
4821        1,
4822        3,
4823        4,
4824        5,
4825        7,
4826        9,
4827        12,
4828        15,
4829        18,
4830        21,
4831        24
4832    };
4833
4834
4835    UConverter *cnv;
4836
4837#define NAME_LMBCS_1 "LMBCS-1"
4838#define NAME_LMBCS_2 "LMBCS-2"
4839
4840
4841   /* Some basic open/close/property tests on some LMBCS converters */
4842    {
4843
4844      char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4845      char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4846      char get_subchars [1];
4847      const char * get_name;
4848      UConverter *cnv1;
4849      UConverter *cnv2;
4850
4851      int8_t len = sizeof(get_subchars);
4852
4853      UErrorCode errorCode=U_ZERO_ERROR;
4854
4855      /* Open */
4856      cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4857      if(U_FAILURE(errorCode)) {
4858         log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4859         return;
4860      }
4861      cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4862      if(U_FAILURE(errorCode)) {
4863         log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4864         return;
4865      }
4866
4867      /* Name */
4868      get_name = ucnv_getName (cnv1, &errorCode);
4869      if (strcmp(NAME_LMBCS_1,get_name)){
4870         log_err("Unexpected converter name: %s\n", get_name);
4871      }
4872      get_name = ucnv_getName (cnv2, &errorCode);
4873      if (strcmp(NAME_LMBCS_2,get_name)){
4874         log_err("Unexpected converter name: %s\n", get_name);
4875      }
4876
4877      /* substitution chars */
4878      ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4879      if(U_FAILURE(errorCode)) {
4880         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4881      }
4882      if (len!=1){
4883         log_err("Unexpected length of sub chars\n");
4884      }
4885      if (get_subchars[0] != expected_subchars[0]){
4886           log_err("Unexpected value of sub chars\n");
4887      }
4888      ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4889      if(U_FAILURE(errorCode)) {
4890         log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4891      }
4892      ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4893      if(U_FAILURE(errorCode)) {
4894         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4895      }
4896      if (len!=1){
4897         log_err("Unexpected length of sub chars\n");
4898      }
4899      if (get_subchars[0] != new_subchars[0]){
4900           log_err("Unexpected value of sub chars\n");
4901      }
4902      ucnv_close(cnv1);
4903      ucnv_close(cnv2);
4904
4905    }
4906
4907    /* LMBCS to Unicode - offsets */
4908    {
4909       UErrorCode errorCode=U_ZERO_ERROR;
4910
4911       const char * pSource = (const char *)pszLMBCS;
4912       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4913
4914       UChar Out [sizeof(pszUnicode) + 1];
4915       UChar * pOut = Out;
4916       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4917
4918       int32_t off [sizeof(offsets)];
4919
4920      /* last 'offset' in expected results is just the final size.
4921         (Makes other tests easier). Compensate here: */
4922
4923       off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4924
4925
4926
4927      cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4928      if(U_FAILURE(errorCode)) {
4929           log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4930           return;
4931      }
4932
4933
4934
4935      ucnv_toUnicode (cnv,
4936                      &pOut,
4937                      OutLimit,
4938                      &pSource,
4939                      sourceLimit,
4940                      off,
4941                      TRUE,
4942                      &errorCode);
4943
4944
4945       if (memcmp(off,offsets,sizeof(offsets)))
4946       {
4947         log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4948       }
4949       if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4950       {
4951         log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4952       }
4953       ucnv_close(cnv);
4954    }
4955    {
4956   /* LMBCS to Unicode - getNextUChar */
4957      const char * sourceStart;
4958      const char *source=(const char *)pszLMBCS;
4959      const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4960      const UChar32 *results= pszUnicode32;
4961      const int *off = offsets32;
4962
4963      UErrorCode errorCode=U_ZERO_ERROR;
4964      UChar32 uniChar;
4965
4966      cnv=ucnv_open("LMBCS-1", &errorCode);
4967      if(U_FAILURE(errorCode)) {
4968           log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4969           return;
4970      }
4971      else
4972      {
4973
4974         while(source<limit) {
4975            sourceStart=source;
4976            uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4977            if(U_FAILURE(errorCode)) {
4978                  log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4979                  break;
4980            } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4981               log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4982                   uniChar, (source-sourceStart), *results, *off);
4983               break;
4984            }
4985            results++;
4986            off++;
4987         }
4988       }
4989       ucnv_close(cnv);
4990    }
4991    { /* test locale & optimization group operations: Unicode to LMBCS */
4992
4993      UErrorCode errorCode=U_ZERO_ERROR;
4994      UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4995      UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4996      UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4997      UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4998      const UChar * pUniOut = uniString;
4999      UChar * pUniIn = uniString;
5000      uint8_t lmbcsString [4];
5001      const char * pLMBCSOut = (const char *)lmbcsString;
5002      char * pLMBCSIn = (char *)lmbcsString;
5003
5004      /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5005      ucnv_fromUnicode (cnv16he,
5006                        &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
5007                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
5008                        NULL, 1, &errorCode);
5009
5010      if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5011      {
5012         log_err("LMBCS-16,locale=he gives unexpected translation\n");
5013      }
5014
5015      pLMBCSIn= (char *)lmbcsString;
5016      pUniOut = uniString;
5017      ucnv_fromUnicode (cnv01us,
5018                        &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
5019                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
5020                        NULL, 1, &errorCode);
5021
5022      if (lmbcsString[0] != 0x9F)
5023      {
5024         log_err("LMBCS-1,locale=US gives unexpected translation\n");
5025      }
5026
5027      /* single byte char from mbcs char set */
5028      lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5029      pLMBCSOut = (const char *)lmbcsString;
5030      pUniIn = uniString;
5031      ucnv_toUnicode (cnv16jp,
5032                        &pUniIn, pUniIn + 1,
5033                        &pLMBCSOut, (pLMBCSOut + 1),
5034                        NULL, 1, &errorCode);
5035      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5036      {
5037           log_err("Unexpected results from LMBCS-16 single byte char\n");
5038      }
5039      /* convert to group 1: should be 3 bytes */
5040      pLMBCSIn = (char *)lmbcsString;
5041      pUniOut = uniString;
5042      ucnv_fromUnicode (cnv01us,
5043                        &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5044                        &pUniOut, pUniOut + 1,
5045                        NULL, 1, &errorCode);
5046      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5047         || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5048      {
5049           log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5050      }
5051      pLMBCSOut = (const char *)lmbcsString;
5052      pUniIn = uniString;
5053      ucnv_toUnicode (cnv01us,
5054                        &pUniIn, pUniIn + 1,
5055                        &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5056                        NULL, 1, &errorCode);
5057      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5058      {
5059           log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5060      }
5061      pLMBCSIn = (char *)lmbcsString;
5062      pUniOut = uniString;
5063      ucnv_fromUnicode (cnv16jp,
5064                        &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5065                        &pUniOut, pUniOut + 1,
5066                        NULL, 1, &errorCode);
5067      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5068      {
5069           log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5070      }
5071      ucnv_close(cnv16he);
5072      ucnv_close(cnv16jp);
5073      ucnv_close(cnv01us);
5074    }
5075    {
5076       /* Small source buffer testing, LMBCS -> Unicode */
5077
5078       UErrorCode errorCode=U_ZERO_ERROR;
5079
5080       const char * pSource = (const char *)pszLMBCS;
5081       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5082       int codepointCount = 0;
5083
5084       UChar Out [sizeof(pszUnicode) + 1];
5085       UChar * pOut = Out;
5086       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
5087
5088
5089       cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5090       if(U_FAILURE(errorCode)) {
5091           log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5092           return;
5093       }
5094
5095
5096       while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5097       {
5098           ucnv_toUnicode (cnv,
5099               &pOut,
5100               OutLimit,
5101               &pSource,
5102               (pSource+1), /* claim that this is a 1- byte buffer */
5103               NULL,
5104               FALSE,    /* FALSE means there might be more chars in the next buffer */
5105               &errorCode);
5106
5107           if (U_SUCCESS (errorCode))
5108           {
5109               if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5110               {
5111                   /* we are on to the next code point: check value */
5112
5113                   if (Out[0] != pszUnicode[codepointCount]){
5114                       log_err("LMBCS->Uni result %lx should have been %lx \n",
5115                           Out[0], pszUnicode[codepointCount]);
5116                   }
5117
5118                   pOut = Out; /* reset for accumulating next code point */
5119                   codepointCount++;
5120               }
5121           }
5122           else
5123           {
5124               log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5125           }
5126       }
5127       {
5128         /* limits & surrogate error testing */
5129         char LIn [sizeof(pszLMBCS)];
5130         const char * pLIn = LIn;
5131
5132         char LOut [sizeof(pszLMBCS)];
5133         char * pLOut = LOut;
5134
5135         UChar UOut [sizeof(pszUnicode)];
5136         UChar * pUOut = UOut;
5137
5138         UChar UIn [sizeof(pszUnicode)];
5139         const UChar * pUIn = UIn;
5140
5141         int32_t off [sizeof(offsets)];
5142         UChar32 uniChar;
5143
5144         errorCode=U_ZERO_ERROR;
5145
5146         /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5147         pUIn++;
5148         ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5149         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5150         {
5151            log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5152         }
5153         pUIn--;
5154
5155         errorCode=U_ZERO_ERROR;
5156         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5157         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5158         {
5159            log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5160         }
5161         errorCode=U_ZERO_ERROR;
5162
5163         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5164         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5165         {
5166            log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5167         }
5168         errorCode=U_ZERO_ERROR;
5169
5170         /* 0 byte source request - no error, no pointer movement */
5171         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5172         ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5173         if(U_FAILURE(errorCode)) {
5174            log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5175         }
5176         if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5177         {
5178              log_err("Unexpected pointer move in 0 byte source request \n");
5179         }
5180         /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5181         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5182         if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5183         {
5184            log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5185         }
5186         if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5187         {
5188            log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5189         }
5190         errorCode = U_ZERO_ERROR;
5191
5192         /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5193
5194         pUIn = pszUnicode;
5195         ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
5196         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5197         {
5198            log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5199         }
5200
5201         errorCode = U_ZERO_ERROR;
5202
5203         pLIn = (const char *)pszLMBCS;
5204         ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5205         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5206         {
5207            log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5208         }
5209
5210         /* unpaired or chopped LMBCS surrogates */
5211
5212         /* OK high surrogate, Low surrogate is chopped */
5213         LIn [0] = (char)0x14;
5214         LIn [1] = (char)0xD8;
5215         LIn [2] = (char)0x01;
5216         LIn [3] = (char)0x14;
5217         LIn [4] = (char)0xDC;
5218         pLIn = LIn;
5219         errorCode = U_ZERO_ERROR;
5220         pUOut = UOut;
5221
5222         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5223         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5224         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5225         {
5226            log_err("Unexpected results on chopped low surrogate\n");
5227         }
5228
5229         /* chopped at surrogate boundary */
5230         LIn [0] = (char)0x14;
5231         LIn [1] = (char)0xD8;
5232         LIn [2] = (char)0x01;
5233         pLIn = LIn;
5234         errorCode = U_ZERO_ERROR;
5235         pUOut = UOut;
5236
5237         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5238         if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5239         {
5240            log_err("Unexpected results on chopped at surrogate boundary \n");
5241         }
5242
5243         /* unpaired surrogate plus valid Unichar */
5244         LIn [0] = (char)0x14;
5245         LIn [1] = (char)0xD8;
5246         LIn [2] = (char)0x01;
5247         LIn [3] = (char)0x14;
5248         LIn [4] = (char)0xC9;
5249         LIn [5] = (char)0xD0;
5250         pLIn = LIn;
5251         errorCode = U_ZERO_ERROR;
5252         pUOut = UOut;
5253
5254         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5255         if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5256         {
5257            log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5258         }
5259
5260      /* unpaired surrogate plus chopped Unichar */
5261         LIn [0] = (char)0x14;
5262         LIn [1] = (char)0xD8;
5263         LIn [2] = (char)0x01;
5264         LIn [3] = (char)0x14;
5265         LIn [4] = (char)0xC9;
5266
5267         pLIn = LIn;
5268         errorCode = U_ZERO_ERROR;
5269         pUOut = UOut;
5270
5271         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5272         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5273         {
5274            log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5275         }
5276
5277         /* unpaired surrogate plus valid non-Unichar */
5278         LIn [0] = (char)0x14;
5279         LIn [1] = (char)0xD8;
5280         LIn [2] = (char)0x01;
5281         LIn [3] = (char)0x0F;
5282         LIn [4] = (char)0x3B;
5283
5284         pLIn = LIn;
5285         errorCode = U_ZERO_ERROR;
5286         pUOut = UOut;
5287
5288         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5289         if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5290         {
5291            log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5292         }
5293
5294         /* unpaired surrogate plus chopped non-Unichar */
5295         LIn [0] = (char)0x14;
5296         LIn [1] = (char)0xD8;
5297         LIn [2] = (char)0x01;
5298         LIn [3] = (char)0x0F;
5299
5300         pLIn = LIn;
5301         errorCode = U_ZERO_ERROR;
5302         pUOut = UOut;
5303
5304         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5305
5306         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5307         {
5308            log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5309         }
5310       }
5311    }
5312   ucnv_close(cnv);  /* final cleanup */
5313}
5314
5315
5316static void TestJitterbug255()
5317{
5318    static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5319    const char *testBuffer = (const char *)testBytes;
5320    const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5321    UErrorCode status = U_ZERO_ERROR;
5322    /*UChar32 result;*/
5323    UConverter *cnv = 0;
5324
5325    cnv = ucnv_open("shift-jis", &status);
5326    if (U_FAILURE(status) || cnv == 0) {
5327        log_data_err("Failed to open the converter for SJIS.\n");
5328                return;
5329    }
5330    while (testBuffer != testEnd)
5331    {
5332        /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5333        if (U_FAILURE(status))
5334        {
5335            log_err("Failed to convert the next UChar for SJIS.\n");
5336            break;
5337        }
5338    }
5339    ucnv_close(cnv);
5340}
5341
5342static void TestEBCDICUS4XML()
5343{
5344    UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5345    static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5346    static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5347    static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5348    char target_x[] = {0x00, 0x00, 0x00, 0x00};
5349    UChar *unicodes = unicodes_x;
5350    const UChar *toUnicodeMaps = toUnicodeMaps_x;
5351    char *target = target_x;
5352    const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5353    UErrorCode status = U_ZERO_ERROR;
5354    UConverter *cnv = 0;
5355
5356    cnv = ucnv_open("ebcdic-xml-us", &status);
5357    if (U_FAILURE(status) || cnv == 0) {
5358        log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5359        return;
5360    }
5361    ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5362    if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5363        log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5364            u_errorName(status));
5365        printUSeqErr(unicodes_x, 3);
5366        printUSeqErr(toUnicodeMaps, 3);
5367    }
5368    status = U_ZERO_ERROR;
5369    ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5370    if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5371        log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5372            u_errorName(status));
5373        printSeqErr((const unsigned char*)target_x, 3);
5374        printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5375    }
5376    ucnv_close(cnv);
5377}
5378#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5379
5380#if !UCONFIG_NO_COLLATION
5381
5382static void TestJitterbug981(){
5383    const UChar* rules;
5384    int32_t rules_length, target_cap, bytes_needed, buff_size;
5385    UErrorCode status = U_ZERO_ERROR;
5386    UConverter *utf8cnv;
5387    UCollator* myCollator;
5388    char *buff;
5389    int numNeeded=0;
5390    utf8cnv = ucnv_open ("utf8", &status);
5391    if(U_FAILURE(status)){
5392        log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5393        return;
5394    }
5395    myCollator = ucol_open("zh", &status);
5396    if(U_FAILURE(status)){
5397        log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5398        ucnv_close(utf8cnv);
5399        return;
5400    }
5401
5402    rules = ucol_getRules(myCollator, &rules_length);
5403    if(rules_length == 0) {
5404        log_data_err("missing zh tailoring rule string\n");
5405        ucol_close(myCollator);
5406        ucnv_close(utf8cnv);
5407        return;
5408    }
5409    buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5410    buff = malloc(buff_size);
5411
5412    target_cap = 0;
5413    do {
5414        ucnv_reset(utf8cnv);
5415        status = U_ZERO_ERROR;
5416        if(target_cap >= buff_size) {
5417            log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5418            break;
5419        }
5420        bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5421            rules, rules_length, &status);
5422        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5423        if(numNeeded!=0 && numNeeded!= bytes_needed){
5424            log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5425            break;
5426        }
5427        numNeeded = bytes_needed;
5428    } while (status == U_BUFFER_OVERFLOW_ERROR);
5429    ucol_close(myCollator);
5430    ucnv_close(utf8cnv);
5431    free(buff);
5432}
5433
5434#endif
5435
5436#if !UCONFIG_NO_LEGACY_CONVERSION
5437static void TestJitterbug1293(){
5438    static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5439    char target[256];
5440    UErrorCode status = U_ZERO_ERROR;
5441    UConverter* conv=NULL;
5442    int32_t target_cap, bytes_needed, numNeeded = 0;
5443    conv = ucnv_open("shift-jis",&status);
5444    if(U_FAILURE(status)){
5445      log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5446      return;
5447    }
5448
5449    do{
5450        target_cap =0;
5451        bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5452        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5453        if(numNeeded!=0 && numNeeded!= bytes_needed){
5454          log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5455        }
5456        numNeeded = bytes_needed;
5457    } while (status == U_BUFFER_OVERFLOW_ERROR);
5458    if(U_FAILURE(status)){
5459      log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5460      return;
5461    }
5462    ucnv_close(conv);
5463}
5464#endif
5465
5466static void TestJB5275_1(){
5467
5468    static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5469                                "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5470                                /* Switch script: */
5471                                "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5472                                "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5473                                "\xEF\x40\x3B\xB3\x0A";
5474    static const UChar expected[] ={
5475            0x003b, 0x0a15, 0x000a, /* Easy characters */
5476            0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5477            0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5478            0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5479            0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5480    };
5481
5482    UErrorCode status = U_ZERO_ERROR;
5483    UConverter* conv = ucnv_open("iscii-gur", &status);
5484    UChar dest[100] = {'\0'};
5485    UChar* target = dest;
5486    UChar* targetLimit = dest+100;
5487    const char* source = data;
5488    const char* sourceLimit = data+strlen(data);
5489    const UChar* exp = expected;
5490
5491    if (U_FAILURE(status)) {
5492        log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5493        return;
5494    }
5495
5496    log_verbose("Testing switching back to default script when new line is encountered.\n");
5497    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5498    if(U_FAILURE(status)){
5499        log_err("conversion failed: %s \n", u_errorName(status));
5500    }
5501    targetLimit = target;
5502    target = dest;
5503    printUSeq(target, targetLimit-target);
5504    while(target<targetLimit){
5505        if(*exp!=*target){
5506            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5507        }
5508        target++;
5509        exp++;
5510    }
5511    ucnv_close(conv);
5512}
5513
5514static void TestJB5275(){
5515    static const char* data =
5516    /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5517    /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5518    /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5519        "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5520        "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5521        "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5522        "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5523        "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5524        "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5525        /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5526    static const UChar expected[] ={
5527        0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5528        0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5529        0x0038, 0x0C95, 0x000A, /* Kannada test */
5530        0x0039, 0x0D15, 0x000A, /* Malayalam test */
5531        0x003A, 0x0A95, 0x000A, /* Gujarati test */
5532        0x003B, 0x0A15, 0x000A, /* Punjabi test */
5533    };
5534
5535    UErrorCode status = U_ZERO_ERROR;
5536    UConverter* conv = ucnv_open("iscii", &status);
5537    UChar dest[100] = {'\0'};
5538    UChar* target = dest;
5539    UChar* targetLimit = dest+100;
5540    const char* source = data;
5541    const char* sourceLimit = data+strlen(data);
5542    const UChar* exp = expected;
5543    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5544    if(U_FAILURE(status)){
5545        log_data_err("conversion failed: %s \n", u_errorName(status));
5546    }
5547    targetLimit = target;
5548    target = dest;
5549
5550    printUSeq(target, targetLimit-target);
5551
5552    while(target<targetLimit){
5553        if(*exp!=*target){
5554            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5555        }
5556        target++;
5557        exp++;
5558    }
5559    ucnv_close(conv);
5560}
5561
5562static void
5563TestIsFixedWidth() {
5564    UErrorCode status = U_ZERO_ERROR;
5565    UConverter *cnv = NULL;
5566    int32_t i;
5567
5568    const char *fixedWidth[] = {
5569            "US-ASCII",
5570            "UTF32",
5571            "ibm-5478_P100-1995"
5572    };
5573
5574    const char *notFixedWidth[] = {
5575            "GB18030",
5576            "UTF8",
5577            "windows-949-2000",
5578            "UTF16"
5579    };
5580
5581    for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5582        cnv = ucnv_open(fixedWidth[i], &status);
5583        if (cnv == NULL || U_FAILURE(status)) {
5584            log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5585            continue;
5586        }
5587
5588        if (!ucnv_isFixedWidth(cnv, &status)) {
5589            log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5590        }
5591        ucnv_close(cnv);
5592    }
5593
5594    for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5595        cnv = ucnv_open(notFixedWidth[i], &status);
5596        if (cnv == NULL || U_FAILURE(status)) {
5597            log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5598            continue;
5599        }
5600
5601        if (ucnv_isFixedWidth(cnv, &status)) {
5602            log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5603        }
5604        ucnv_close(cnv);
5605    }
5606}
5607