1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File CCONVTST.C
9*
10* Modification History:
11*        Name                     Description
12*    Steven R. Loomis     7/8/1999      Adding input buffer test
13********************************************************************************
14*/
15#include <stdio.h>
16#include "cstring.h"
17#include "unicode/uloc.h"
18#include "unicode/ucnv.h"
19#include "unicode/ucnv_err.h"
20#include "unicode/ucnv_cb.h"
21#include "cintltst.h"
22#include "unicode/utypes.h"
23#include "unicode/ustring.h"
24#include "unicode/ucol.h"
25#include "unicode/utf16.h"
26#include "cmemory.h"
27#include "nucnvtst.h"
28
29#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
30
31static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
32static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
33#if !UCONFIG_NO_COLLATION
34static void TestJitterbug981(void);
35#endif
36#if !UCONFIG_NO_LEGACY_CONVERSION
37static void TestJitterbug1293(void);
38#endif
39static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
40static void TestConverterTypesAndStarters(void);
41static void TestAmbiguous(void);
42static void TestSignatureDetection(void);
43static void TestUTF7(void);
44static void TestIMAP(void);
45static void TestUTF8(void);
46static void TestCESU8(void);
47static void TestUTF16(void);
48static void TestUTF16BE(void);
49static void TestUTF16LE(void);
50static void TestUTF32(void);
51static void TestUTF32BE(void);
52static void TestUTF32LE(void);
53static void TestLATIN1(void);
54
55#if !UCONFIG_NO_LEGACY_CONVERSION
56static void TestSBCS(void);
57static void TestDBCS(void);
58static void TestMBCS(void);
59#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60static void TestICCRunout(void);
61#endif
62
63#ifdef U_ENABLE_GENERIC_ISO_2022
64static void TestISO_2022(void);
65#endif
66
67static void TestISO_2022_JP(void);
68static void TestISO_2022_JP_1(void);
69static void TestISO_2022_JP_2(void);
70static void TestISO_2022_KR(void);
71static void TestISO_2022_KR_1(void);
72static void TestISO_2022_CN(void);
73#if 0
74   /*
75    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
76    */
77static void TestISO_2022_CN_EXT(void);
78#endif
79static void TestJIS(void);
80static void TestHZ(void);
81#endif
82
83static void TestSCSU(void);
84
85#if !UCONFIG_NO_LEGACY_CONVERSION
86static void TestEBCDIC_STATEFUL(void);
87static void TestGB18030(void);
88static void TestLMBCS(void);
89static void TestJitterbug255(void);
90static void TestEBCDICUS4XML(void);
91#if 0
92   /*
93    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
94    */
95static void TestJitterbug915(void);
96#endif
97static void TestISCII(void);
98
99static void TestCoverageMBCS(void);
100static void TestJitterbug2346(void);
101static void TestJitterbug2411(void);
102static void TestJB5275(void);
103static void TestJB5275_1(void);
104static void TestJitterbug6175(void);
105
106static void TestIsFixedWidth(void);
107#endif
108
109static void TestInBufSizes(void);
110
111static void TestRoundTrippingAllUTF(void);
112static void TestConv(const uint16_t in[],
113                     int len,
114                     const char* conv,
115                     const char* lang,
116                     char byteArr[],
117                     int byteArrLen);
118
119/* open a converter, using test data if it begins with '@' */
120static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
121
122
123#define NEW_MAX_BUFFER 999
124
125static int32_t  gInBufferSize = NEW_MAX_BUFFER;
126static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
127static char     gNuConvTestName[1024];
128
129#define nct_min(x,y)  ((x<y) ? x : y)
130
131static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
132{
133  if(cnv && cnv[0] == '@') {
134    return ucnv_openPackage(loadTestData(err), cnv+1, err);
135  } else {
136    return ucnv_open(cnv, err);
137  }
138}
139
140static void printSeq(const unsigned char* a, int len)
141{
142    int i=0;
143    log_verbose("{");
144    while (i<len)
145        log_verbose("0x%02x ", a[i++]);
146    log_verbose("}\n");
147}
148
149static void printUSeq(const UChar* a, int len)
150{
151    int i=0;
152    log_verbose("{U+");
153    while (i<len) log_verbose("0x%04x ", a[i++]);
154    log_verbose("}\n");
155}
156
157static void printSeqErr(const unsigned char* a, int len)
158{
159    int i=0;
160    fprintf(stderr, "{");
161    while (i<len)
162        fprintf(stderr, "0x%02x ", a[i++]);
163    fprintf(stderr, "}\n");
164}
165
166static void printUSeqErr(const UChar* a, int len)
167{
168    int i=0;
169    fprintf(stderr, "{U+");
170    while (i<len)
171        fprintf(stderr, "0x%04x ", a[i++]);
172    fprintf(stderr,"}\n");
173}
174
175static void
176TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
177{
178     const char* s0;
179     const char* s=(char*)source;
180     const int32_t *r=results;
181     UErrorCode errorCode=U_ZERO_ERROR;
182     UChar32 c;
183
184     while(s<limit) {
185        s0=s;
186        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
188            break; /* no more significant input */
189        } else if(U_FAILURE(errorCode)) {
190            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
191            break;
192        } else if(
193            /* test the expected number of input bytes only if >=0 */
194            (*r>=0 && (int32_t)(s-s0)!=*r) ||
195            c!=*(r+1)
196        ) {
197            log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198                message, c, (s-s0), *(r+1), *r);
199            break;
200        }
201        r+=2;
202    }
203}
204
205static void
206TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
207{
208     const char* s=(char*)source;
209     UErrorCode errorCode=U_ZERO_ERROR;
210     uint32_t c;
211     c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
212     if(errorCode != expected){
213        log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
214     }
215     if(c != 0xFFFD && c != 0xffff){
216        log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
217     }
218
219}
220
221static void TestInBufSizes(void)
222{
223  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
224#if 1
225  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
226  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
227  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
228  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
229  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
230  TestNewConvertWithBufferSizes(1,1);
231  TestNewConvertWithBufferSizes(2,3);
232  TestNewConvertWithBufferSizes(3,2);
233#endif
234}
235
236static void TestOutBufSizes(void)
237{
238#if 1
239  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
240  TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
241  TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
242  TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
243  TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
244  TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
245
246#endif
247}
248
249
250void addTestNewConvert(TestNode** root)
251{
252#if !UCONFIG_NO_FILE_IO
253   addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
254   addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
255#endif
256   addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257   addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
258   addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
259   addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
260   addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
261   addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
262
263   /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264   addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
265   addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
266   addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
267   addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
268   addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
269   addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
270   addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
271
272#if !UCONFIG_NO_LEGACY_CONVERSION
273   addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
274#endif
275
276   addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
277
278#if !UCONFIG_NO_LEGACY_CONVERSION
279   addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
280#if !UCONFIG_NO_FILE_IO
281   addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
282   addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
283#endif
284   addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
285
286#ifdef U_ENABLE_GENERIC_ISO_2022
287   addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
288#endif
289   addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
290   addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
291   addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
292   /* BEGIN android-changed: we don't have ISO_2022_JP_2
293   addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
294   END android-changed */
295   addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
296   addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
297   /* BEGIN android-changed: we don't have ISO-2022-CN.
298   addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
299   END android-changed */
300   /*
301    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
302   addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
303   addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
304    */
305   addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
306#endif
307
308   addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
309
310#if !UCONFIG_NO_LEGACY_CONVERSION
311   addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
312   addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
313   addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
314   addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
315   addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
316   addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
317   addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
318#if !UCONFIG_NO_COLLATION
319   /* BEGIN android-removed
320      To save space, Android does not include the collation tailoring rules.
321      Skip the related tests.
322   addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
323      END android-removed */
324#endif
325
326   addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
327#endif
328
329
330#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
331   addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
332#endif
333
334   addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
335
336#if !UCONFIG_NO_LEGACY_CONVERSION
337   addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
338   addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
339   /* BEGIN android-removed
340      To save space, Android does not build full ISO2022 CJK tables.
341      We turn off the tests here.
342   addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
343      END android-removed */
344
345   addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
346#endif
347}
348
349
350/* Note that this test already makes use of statics, so it's not really
351   multithread safe.
352   This convenience function lets us make the error messages actually useful.
353*/
354
355static void setNuConvTestName(const char *codepage, const char *direction)
356{
357    sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
358        codepage,
359        direction,
360        (int)gInBufferSize,
361        (int)gOutBufferSize);
362}
363
364typedef enum
365{
366  TC_OK       = 0,  /* test was OK */
367  TC_MISMATCH = 1,  /* Match failed - err was printed */
368  TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
369} ETestConvertResult;
370
371/* Note: This function uses global variables and it will not do offset
372checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
373static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
374                const char *codepage, const int32_t *expectOffsets , UBool useFallback)
375{
376    UErrorCode status = U_ZERO_ERROR;
377    UConverter *conv = 0;
378    char    junkout[NEW_MAX_BUFFER]; /* FIX */
379    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
380    char *p;
381    const UChar *src;
382    char *end;
383    char *targ;
384    int32_t *offs;
385    int i;
386    int32_t   realBufferSize;
387    char *realBufferEnd;
388    const UChar *realSourceEnd;
389    const UChar *sourceLimit;
390    UBool checkOffsets = TRUE;
391    UBool doFlush;
392
393    for(i=0;i<NEW_MAX_BUFFER;i++)
394        junkout[i] = (char)0xF0;
395    for(i=0;i<NEW_MAX_BUFFER;i++)
396        junokout[i] = 0xFF;
397
398    setNuConvTestName(codepage, "FROM");
399
400    log_verbose("\n=========  %s\n", gNuConvTestName);
401
402    conv = my_ucnv_open(codepage, &status);
403
404    if(U_FAILURE(status))
405    {
406        log_data_err("Couldn't open converter %s\n",codepage);
407        return TC_FAIL;
408    }
409    if(useFallback){
410        ucnv_setFallback(conv,useFallback);
411    }
412
413    log_verbose("Converter opened..\n");
414
415    src = source;
416    targ = junkout;
417    offs = junokout;
418
419    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
420    realBufferEnd = junkout + realBufferSize;
421    realSourceEnd = source + sourceLen;
422
423    if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
424        checkOffsets = FALSE;
425
426    do
427    {
428      end = nct_min(targ + gOutBufferSize, realBufferEnd);
429      sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
430
431      doFlush = (UBool)(sourceLimit == realSourceEnd);
432
433      if(targ == realBufferEnd) {
434        log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
435        return TC_FAIL;
436      }
437      log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
438
439
440      status = U_ZERO_ERROR;
441
442      ucnv_fromUnicode (conv,
443                        &targ,
444                        end,
445                        &src,
446                        sourceLimit,
447                        checkOffsets ? offs : NULL,
448                        doFlush, /* flush if we're at the end of the input data */
449                        &status);
450    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
451
452    if(U_FAILURE(status)) {
453      log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
454      return TC_FAIL;
455    }
456
457    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
458                sourceLen, targ-junkout);
459
460    if(getTestOption(VERBOSITY_OPTION))
461    {
462      char junk[9999];
463      char offset_str[9999];
464      char *ptr;
465
466      junk[0] = 0;
467      offset_str[0] = 0;
468      for(ptr = junkout;ptr<targ;ptr++) {
469        sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
470        sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
471      }
472
473      log_verbose(junk);
474      printSeq((const uint8_t *)expect, expectLen);
475      if ( checkOffsets ) {
476        log_verbose("\nOffsets:");
477        log_verbose(offset_str);
478      }
479      log_verbose("\n");
480    }
481    ucnv_close(conv);
482
483    if(expectLen != targ-junkout) {
484      log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
485      log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
486      fprintf(stderr, "Got:\n");
487      printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
488      fprintf(stderr, "Expected:\n");
489      printSeqErr((const unsigned char*)expect, expectLen);
490      return TC_MISMATCH;
491    }
492
493    if (checkOffsets && (expectOffsets != 0) ) {
494      log_verbose("comparing %d offsets..\n", targ-junkout);
495      if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
496        log_err("did not get the expected offsets. %s\n", gNuConvTestName);
497        printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
498        log_err("\n");
499        log_err("Got  :     ");
500        for(p=junkout;p<targ;p++) {
501          log_err("%d,", junokout[p-junkout]);
502        }
503        log_err("\n");
504        log_err("Expected:  ");
505        for(i=0; i<(targ-junkout); i++) {
506          log_err("%d,", expectOffsets[i]);
507        }
508        log_err("\n");
509      }
510    }
511
512    log_verbose("comparing..\n");
513    if(!memcmp(junkout, expect, expectLen)) {
514      log_verbose("Matches!\n");
515      return TC_OK;
516    } else {
517      log_err("String does not match u->%s\n", gNuConvTestName);
518      printUSeqErr(source, sourceLen);
519      fprintf(stderr, "Got:\n");
520      printSeqErr((const unsigned char *)junkout, expectLen);
521      fprintf(stderr, "Expected:\n");
522      printSeqErr((const unsigned char *)expect, expectLen);
523
524      return TC_MISMATCH;
525    }
526}
527
528/* Note: This function uses global variables and it will not do offset
529checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
530static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
531                                          const char *codepage, const int32_t *expectOffsets, UBool useFallback)
532{
533    UErrorCode status = U_ZERO_ERROR;
534    UConverter *conv = 0;
535    UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
536    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
537    const char *src;
538    const char *realSourceEnd;
539    const char *srcLimit;
540    UChar *p;
541    UChar *targ;
542    UChar *end;
543    int32_t *offs;
544    int i;
545    UBool   checkOffsets = TRUE;
546
547    int32_t   realBufferSize;
548    UChar *realBufferEnd;
549
550
551    for(i=0;i<NEW_MAX_BUFFER;i++)
552        junkout[i] = 0xFFFE;
553
554    for(i=0;i<NEW_MAX_BUFFER;i++)
555        junokout[i] = -1;
556
557    setNuConvTestName(codepage, "TO");
558
559    log_verbose("\n=========  %s\n", gNuConvTestName);
560
561    conv = my_ucnv_open(codepage, &status);
562
563    if(U_FAILURE(status))
564    {
565        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
566        return TC_FAIL;
567    }
568    if(useFallback){
569        ucnv_setFallback(conv,useFallback);
570    }
571    log_verbose("Converter opened..\n");
572
573    src = (const char *)source;
574    targ = junkout;
575    offs = junokout;
576
577    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
578    realBufferEnd = junkout + realBufferSize;
579    realSourceEnd = src + sourcelen;
580
581    if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
582        checkOffsets = FALSE;
583
584    do
585    {
586        end = nct_min( targ + gOutBufferSize, realBufferEnd);
587        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
588
589        if(targ == realBufferEnd)
590        {
591            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
592            return TC_FAIL;
593        }
594        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
595
596        /* oldTarg = targ; */
597
598        status = U_ZERO_ERROR;
599
600        ucnv_toUnicode (conv,
601                &targ,
602                end,
603                &src,
604                srcLimit,
605                checkOffsets ? offs : NULL,
606                (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
607                &status);
608
609        /*        offs += (targ-oldTarg); */
610
611      } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
612
613    if(U_FAILURE(status))
614    {
615        log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
616        return TC_FAIL;
617    }
618
619    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
620        sourcelen, targ-junkout);
621    if(getTestOption(VERBOSITY_OPTION))
622    {
623        char junk[9999];
624        char offset_str[9999];
625        UChar *ptr;
626
627        junk[0] = 0;
628        offset_str[0] = 0;
629
630        for(ptr = junkout;ptr<targ;ptr++)
631        {
632            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
633            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
634        }
635
636        log_verbose(junk);
637        printUSeq(expect, expectlen);
638        if ( checkOffsets )
639          {
640            log_verbose("\nOffsets:");
641            log_verbose(offset_str);
642          }
643        log_verbose("\n");
644    }
645    ucnv_close(conv);
646
647    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
648
649    if (checkOffsets && (expectOffsets != 0))
650    {
651        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
652            log_err("did not get the expected offsets. %s\n",gNuConvTestName);
653            log_err("Got:      ");
654            for(p=junkout;p<targ;p++) {
655                log_err("%d,", junokout[p-junkout]);
656            }
657            log_err("\n");
658            log_err("Expected: ");
659            for(i=0; i<(targ-junkout); i++) {
660                log_err("%d,", expectOffsets[i]);
661            }
662            log_err("\n");
663            log_err("output:   ");
664            for(i=0; i<(targ-junkout); i++) {
665                log_err("%X,", junkout[i]);
666            }
667            log_err("\n");
668            log_err("input:    ");
669            for(i=0; i<(src-(const char *)source); i++) {
670                log_err("%X,", (unsigned char)source[i]);
671            }
672            log_err("\n");
673        }
674    }
675
676    if(!memcmp(junkout, expect, expectlen*2))
677    {
678        log_verbose("Matches!\n");
679        return TC_OK;
680    }
681    else
682    {
683        log_err("String does not match. %s\n", gNuConvTestName);
684        log_verbose("String does not match. %s\n", gNuConvTestName);
685        printf("\nGot:");
686        printUSeqErr(junkout, expectlen);
687        printf("\nExpected:");
688        printUSeqErr(expect, expectlen);
689        return TC_MISMATCH;
690    }
691}
692
693
694static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
695{
696/** test chars #1 */
697    /*  1 2 3  1Han 2Han 3Han .  */
698    static const UChar   sampleText[] =
699     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
700    static const UChar sampleTextRoundTripUnmappable[] =
701    { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
702
703
704    static const uint8_t expectedUTF8[] =
705     { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
706    static const int32_t toUTF8Offs[] =
707     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
708    static const int32_t fmUTF8Offs[] =
709     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
710
711#ifdef U_ENABLE_GENERIC_ISO_2022
712    /* Same as UTF8, but with ^[%B preceeding */
713    static const const uint8_t expectedISO2022[] =
714     { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
715    static const int32_t toISO2022Offs[]     =
716     { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
717       0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
718    static const int32_t fmISO2022Offs[] =
719     { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
720#endif
721
722    /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
723    static const uint8_t expectedIBM930[] =
724     { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
725    static const int32_t toIBM930Offs[] =
726     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
727    static const int32_t fmIBM930Offs[] =
728     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
729
730    /* 1 2 3 0 h1 h2 h3 . MBCS*/
731    static const uint8_t expectedIBM943[] =
732     {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
733    static const int32_t toIBM943Offs    [] =
734     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
735    static const int32_t fmIBM943Offs[] =
736     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
737
738    /* 1 2 3 0 h1 h2 h3 . DBCS*/
739    static const uint8_t expectedIBM9027[] =
740     {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
741    static const int32_t toIBM9027Offs    [] =
742     {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
743
744     /* 1 2 3 0 <?> <?> <?> . SBCS*/
745    static const uint8_t expectedIBM920[] =
746     {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
747    static const int32_t toIBM920Offs    [] =
748     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
749
750    /* 1 2 3 0 <?> <?> <?> . SBCS*/
751    static const uint8_t expectedISO88593[] =
752     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
753    static const int32_t toISO88593Offs[]     =
754     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
755
756    /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
757    static const uint8_t expectedLATIN1[] =
758     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
759    static const int32_t toLATIN1Offs[]     =
760     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
761
762
763    /*  etc */
764    static const uint8_t expectedUTF16BE[] =
765     { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
766    static const int32_t toUTF16BEOffs[]=
767     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
768    static const int32_t fmUTF16BEOffs[] =
769     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
770
771    static const uint8_t expectedUTF16LE[] =
772     { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
773    static const int32_t toUTF16LEOffs[]=
774     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
775    static const int32_t fmUTF16LEOffs[] =
776     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
777
778    static const uint8_t expectedUTF32BE[] =
779     { 0x00, 0x00, 0x00, 0x31,
780       0x00, 0x00, 0x00, 0x32,
781       0x00, 0x00, 0x00, 0x33,
782       0x00, 0x00, 0x00, 0x00,
783       0x00, 0x00, 0x4e, 0x00,
784       0x00, 0x00, 0x4e, 0x8c,
785       0x00, 0x00, 0x4e, 0x09,
786       0x00, 0x00, 0x00, 0x2e,
787       0x00, 0x02, 0x00, 0x21 };
788    static const int32_t toUTF32BEOffs[]=
789     { 0x00, 0x00, 0x00, 0x00,
790       0x01, 0x01, 0x01, 0x01,
791       0x02, 0x02, 0x02, 0x02,
792       0x03, 0x03, 0x03, 0x03,
793       0x04, 0x04, 0x04, 0x04,
794       0x05, 0x05, 0x05, 0x05,
795       0x06, 0x06, 0x06, 0x06,
796       0x07, 0x07, 0x07, 0x07,
797       0x08, 0x08, 0x08, 0x08,
798       0x08, 0x08, 0x08, 0x08 };
799    static const int32_t fmUTF32BEOffs[] =
800     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
801
802    static const uint8_t expectedUTF32LE[] =
803     { 0x31, 0x00, 0x00, 0x00,
804       0x32, 0x00, 0x00, 0x00,
805       0x33, 0x00, 0x00, 0x00,
806       0x00, 0x00, 0x00, 0x00,
807       0x00, 0x4e, 0x00, 0x00,
808       0x8c, 0x4e, 0x00, 0x00,
809       0x09, 0x4e, 0x00, 0x00,
810       0x2e, 0x00, 0x00, 0x00,
811       0x21, 0x00, 0x02, 0x00 };
812    static const int32_t toUTF32LEOffs[]=
813     { 0x00, 0x00, 0x00, 0x00,
814       0x01, 0x01, 0x01, 0x01,
815       0x02, 0x02, 0x02, 0x02,
816       0x03, 0x03, 0x03, 0x03,
817       0x04, 0x04, 0x04, 0x04,
818       0x05, 0x05, 0x05, 0x05,
819       0x06, 0x06, 0x06, 0x06,
820       0x07, 0x07, 0x07, 0x07,
821       0x08, 0x08, 0x08, 0x08,
822       0x08, 0x08, 0x08, 0x08 };
823    static const int32_t fmUTF32LEOffs[] =
824     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
825
826
827
828
829/** Test chars #2 **/
830
831    /* Sahha [health],  slashed h's */
832    static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
833    static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
834
835    /* LMBCS */
836    static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
837    static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
838    static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
839    static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
840    /*********************************** START OF CODE finally *************/
841
842    gInBufferSize = insize;
843    gOutBufferSize = outsize;
844
845    log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
846
847
848    /*UTF-8*/
849    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
850        expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
851
852    log_verbose("Test surrogate behaviour for UTF8\n");
853    {
854        static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
855        static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
856                           0xf0, 0x90, 0x90, 0x81,
857                           0xef, 0xbf, 0xbd
858        };
859        static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
860        testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
861                         expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
862
863
864    }
865
866#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
867    /*ISO-2022*/
868    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
869        expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
870#endif
871
872    /*UTF16 LE*/
873    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
874        expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
875    /*UTF16 BE*/
876    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
877        expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
878    /*UTF32 LE*/
879    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
880        expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
881    /*UTF32 BE*/
882    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
883        expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
884
885    /*LATIN_1*/
886    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
887        expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
888
889#if !UCONFIG_NO_LEGACY_CONVERSION
890    /*EBCDIC_STATEFUL*/
891    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
892        expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
893
894    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
895        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
896
897    /*MBCS*/
898
899    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
900        expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
901    /*DBCS*/
902    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
903        expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
904    /*SBCS*/
905    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
906        expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
907    /*SBCS*/
908    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
909        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
910#endif
911
912
913/****/
914
915    /*UTF-8*/
916    testConvertToU(expectedUTF8, sizeof(expectedUTF8),
917        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
918#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
919    /*ISO-2022*/
920    testConvertToU(expectedISO2022, sizeof(expectedISO2022),
921        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
922#endif
923
924    /*UTF16 LE*/
925    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
926        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
927    /*UTF16 BE*/
928    testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
929        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
930    /*UTF32 LE*/
931    testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
932        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
933    /*UTF32 BE*/
934    testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
935        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
936
937#if !UCONFIG_NO_LEGACY_CONVERSION
938    /*EBCDIC_STATEFUL*/
939    testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
940            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE);
941    /*MBCS*/
942    testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
943            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE);
944#endif
945
946    /* Try it again to make sure it still works */
947    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
948        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
949
950#if !UCONFIG_NO_LEGACY_CONVERSION
951    testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
952        malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
953
954    testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
955        expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
956
957    /*LMBCS*/
958    testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
959        expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
960    testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
961        LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
962#endif
963
964    /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
965    {
966        /* encode directly set D and set O */
967        static const uint8_t utf7[] = {
968            /*
969                Hi Mom -+Jjo--!
970                A+ImIDkQ.
971                +-
972                +ZeVnLIqe-
973            */
974            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
975            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
976            0x2b, 0x2d,
977            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
978        };
979        static const UChar unicode[] = {
980            /*
981                Hi Mom -<WHITE SMILING FACE>-!
982                A<NOT IDENTICAL TO><ALPHA>.
983                +
984                [Japanese word "nihongo"]
985            */
986            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
987            0x41, 0x2262, 0x0391, 0x2e,
988            0x2b,
989            0x65e5, 0x672c, 0x8a9e
990        };
991        static const int32_t toUnicodeOffsets[] = {
992            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
993            15, 17, 19, 23,
994            24,
995            27, 29, 32
996        };
997        static const int32_t fromUnicodeOffsets[] = {
998            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
999            11, 12, 12, 12, 13, 13, 13, 13, 14,
1000            15, 15,
1001            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1002        };
1003
1004        /* same but escaping set O (the exclamation mark) */
1005        static const uint8_t utf7Restricted[] = {
1006            /*
1007                Hi Mom -+Jjo--+ACE-
1008                A+ImIDkQ.
1009                +-
1010                +ZeVnLIqe-
1011            */
1012            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1013            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1014            0x2b, 0x2d,
1015            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1016        };
1017        static const int32_t toUnicodeOffsetsR[] = {
1018            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1019            19, 21, 23, 27,
1020            28,
1021            31, 33, 36
1022        };
1023        static const int32_t fromUnicodeOffsetsR[] = {
1024            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1025            11, 12, 12, 12, 13, 13, 13, 13, 14,
1026            15, 15,
1027            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1028        };
1029
1030        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1031
1032        testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
1033
1034        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1035
1036        testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1037    }
1038
1039    /*
1040     * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1041     * modified according to RFC 2060,
1042     * and supplemented with the one example in RFC 2060 itself.
1043     */
1044    {
1045        static const uint8_t imap[] = {
1046            /*  Hi Mom -&Jjo--!
1047                A&ImIDkQ-.
1048                &-
1049                &ZeVnLIqe-
1050                \
1051                ~peter
1052                /mail
1053                /&ZeVnLIqe-
1054                /&U,BTFw-
1055            */
1056            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1057            0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1058            0x26, 0x2d,
1059            0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1060            0x5c,
1061            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1062            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1063            0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1064            0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1065        };
1066        static const UChar unicode[] = {
1067            /*  Hi Mom -<WHITE SMILING FACE>-!
1068                A<NOT IDENTICAL TO><ALPHA>.
1069                &
1070                [Japanese word "nihongo"]
1071                \
1072                ~peter
1073                /mail
1074                /<65e5, 672c, 8a9e>
1075                /<53f0, 5317>
1076            */
1077            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1078            0x41, 0x2262, 0x0391, 0x2e,
1079            0x26,
1080            0x65e5, 0x672c, 0x8a9e,
1081            0x5c,
1082            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1083            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1084            0x2f, 0x65e5, 0x672c, 0x8a9e,
1085            0x2f, 0x53f0, 0x5317
1086        };
1087        static const int32_t toUnicodeOffsets[] = {
1088            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1089            15, 17, 19, 24,
1090            25,
1091            28, 30, 33,
1092            37,
1093            38, 39, 40, 41, 42, 43,
1094            44, 45, 46, 47, 48,
1095            49, 51, 53, 56,
1096            60, 62, 64
1097        };
1098        static const int32_t fromUnicodeOffsets[] = {
1099            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1100            11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1101            15, 15,
1102            16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1103            19,
1104            20, 21, 22, 23, 24, 25,
1105            26, 27, 28, 29, 30,
1106            31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1107            35, 36, 36, 36, 37, 37, 37, 37, 37
1108        };
1109
1110        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1111
1112        testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1113    }
1114
1115    /* Test UTF-8 bad data handling*/
1116    {
1117        static const uint8_t utf8[]={
1118            0x61,
1119            0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1120            0x00,
1121            0x62,
1122            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1123            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1124            0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1125            0xdf, 0xbf,                     /* 7ff */
1126            0xbf,                           /* truncated tail */
1127            0xf4, 0x90, 0x80, 0x80,         /* 11FFFF */
1128            0x02
1129        };
1130
1131        static const uint16_t utf8Expected[]={
1132            0x0061,
1133            0xfffd,
1134            0x0000,
1135            0x0062,
1136            0xfffd,
1137            0xfffd,
1138            0xdbff, 0xdfff,
1139            0x07ff,
1140            0xfffd,
1141            0xfffd,
1142            0x0002
1143        };
1144
1145        static const int32_t utf8Offsets[]={
1146            0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1147        };
1148        testConvertToU(utf8, sizeof(utf8),
1149                       utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1150
1151    }
1152
1153    /* Test UTF-32BE bad data handling*/
1154    {
1155        static const uint8_t utf32[]={
1156            0x00, 0x00, 0x00, 0x61,
1157            0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1158            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1159            0x00, 0x00, 0x00, 0x62,
1160            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1161            0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1162            0x00, 0x00, 0x01, 0x62,
1163            0x00, 0x00, 0x02, 0x62
1164        };
1165        static const uint16_t utf32Expected[]={
1166            0x0061,
1167            0xfffd,         /* 0x110000 out of range */
1168            0xDBFF,         /* 0x10FFFF in range */
1169            0xDFFF,
1170            0x0062,
1171            0xfffd,         /* 0xffffffff out of range */
1172            0xfffd,         /* 0x7fffffff out of range */
1173            0x0162,
1174            0x0262
1175        };
1176        static const int32_t utf32Offsets[]={
1177            0, 4, 8, 8, 12, 16, 20, 24, 28
1178        };
1179        static const uint8_t utf32ExpectedBack[]={
1180            0x00, 0x00, 0x00, 0x61,
1181            0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1182            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1183            0x00, 0x00, 0x00, 0x62,
1184            0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1185            0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1186            0x00, 0x00, 0x01, 0x62,
1187            0x00, 0x00, 0x02, 0x62
1188        };
1189        static const int32_t utf32OffsetsBack[]={
1190            0,0,0,0,
1191            1,1,1,1,
1192            2,2,2,2,
1193            4,4,4,4,
1194            5,5,5,5,
1195            6,6,6,6,
1196            7,7,7,7,
1197            8,8,8,8
1198        };
1199
1200        testConvertToU(utf32, sizeof(utf32),
1201                       utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1202        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1203            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1204    }
1205
1206    /* Test UTF-32LE bad data handling*/
1207    {
1208        static const uint8_t utf32[]={
1209            0x61, 0x00, 0x00, 0x00,
1210            0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1211            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1212            0x62, 0x00, 0x00, 0x00,
1213            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1214            0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1215            0x62, 0x01, 0x00, 0x00,
1216            0x62, 0x02, 0x00, 0x00,
1217        };
1218
1219        static const uint16_t utf32Expected[]={
1220            0x0061,
1221            0xfffd,         /* 0x110000 out of range */
1222            0xDBFF,         /* 0x10FFFF in range */
1223            0xDFFF,
1224            0x0062,
1225            0xfffd,         /* 0xffffffff out of range */
1226            0xfffd,         /* 0x7fffffff out of range */
1227            0x0162,
1228            0x0262
1229        };
1230        static const int32_t utf32Offsets[]={
1231            0, 4, 8, 8, 12, 16, 20, 24, 28
1232        };
1233        static const uint8_t utf32ExpectedBack[]={
1234            0x61, 0x00, 0x00, 0x00,
1235            0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1236            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1237            0x62, 0x00, 0x00, 0x00,
1238            0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1239            0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1240            0x62, 0x01, 0x00, 0x00,
1241            0x62, 0x02, 0x00, 0x00
1242        };
1243        static const int32_t utf32OffsetsBack[]={
1244            0,0,0,0,
1245            1,1,1,1,
1246            2,2,2,2,
1247            4,4,4,4,
1248            5,5,5,5,
1249            6,6,6,6,
1250            7,7,7,7,
1251            8,8,8,8
1252        };
1253        testConvertToU(utf32, sizeof(utf32),
1254            utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1255        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1256            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1257    }
1258}
1259
1260static void TestCoverageMBCS(){
1261#if 0
1262    UErrorCode status = U_ZERO_ERROR;
1263    const char *directory = loadTestData(&status);
1264    char* tdpath = NULL;
1265    char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1266    int len = strlen(directory);
1267    char* index=NULL;
1268
1269    tdpath = (char*) malloc(sizeof(char) * (len * 2));
1270    uprv_strcpy(saveDirectory,u_getDataDirectory());
1271    log_verbose("Retrieved data directory %s \n",saveDirectory);
1272    uprv_strcpy(tdpath,directory);
1273    index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1274
1275    if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1276            *(index+1)=0;
1277    }
1278    u_setDataDirectory(tdpath);
1279    log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1280#endif
1281
1282    /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1283      which is test file for MBCS conversion with single-byte codepage data.*/
1284    {
1285
1286        /* MBCS with single byte codepage data test1.ucm*/
1287        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1288        const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1289        int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1290
1291        /*from Unicode*/
1292        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1293            expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1294    }
1295
1296    /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1297      which is test file for MBCS conversion with three-byte codepage data.*/
1298    {
1299
1300        /* MBCS with three byte codepage data test3.ucm*/
1301        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1302        const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1303        int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1304
1305        const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1306        const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1307        int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1308
1309        /*from Unicode*/
1310        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1311            expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1312
1313        /*to Unicode*/
1314        testConvertToU(test3input, sizeof(test3input),
1315            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1316
1317    }
1318
1319    /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1320      which is test file for MBCS conversion with four-byte codepage data.*/
1321    {
1322
1323        /* MBCS with three byte codepage data test4.ucm*/
1324        static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1325        static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1326        static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1327
1328        static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1329        static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1330        static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1331
1332        /*from Unicode*/
1333        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1334            expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1335
1336        /*to Unicode*/
1337        testConvertToU(test4input, sizeof(test4input),
1338            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1339
1340    }
1341#if 0
1342    free(tdpath);
1343    /* restore the original data directory */
1344    log_verbose("Setting the data directory to %s \n", saveDirectory);
1345    u_setDataDirectory(saveDirectory);
1346    free(saveDirectory);
1347#endif
1348
1349}
1350
1351static void TestConverterType(const char *convName, UConverterType convType) {
1352    UConverter* myConverter;
1353    UErrorCode err = U_ZERO_ERROR;
1354
1355    myConverter = my_ucnv_open(convName, &err);
1356
1357    if (U_FAILURE(err)) {
1358        log_data_err("Failed to create an %s converter\n", convName);
1359        return;
1360    }
1361    else
1362    {
1363        if (ucnv_getType(myConverter)!=convType) {
1364            log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1365                convName, convType);
1366        }
1367        else {
1368            log_verbose("ucnv_getType %s ok\n", convName);
1369        }
1370    }
1371    ucnv_close(myConverter);
1372}
1373
1374static void TestConverterTypesAndStarters()
1375{
1376#if !UCONFIG_NO_LEGACY_CONVERSION
1377    UConverter* myConverter;
1378    UErrorCode err = U_ZERO_ERROR;
1379    UBool mystarters[256];
1380
1381/*    const UBool expectedKSCstarters[256] = {
1382        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1386        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1387        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1388        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1389        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1390        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1391        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1392        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1393        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1394        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1395        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1396        FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1397        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1398        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1399        TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1400        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1401        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1402        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1403        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1404        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1405        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1406        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1407        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1408
1409
1410    log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1411
1412    myConverter = ucnv_open("ksc", &err);
1413    if (U_FAILURE(err)) {
1414      log_data_err("Failed to create an ibm-ksc converter\n");
1415      return;
1416    }
1417    else
1418    {
1419        if (ucnv_getType(myConverter)!=UCNV_MBCS)
1420            log_err("ucnv_getType Failed for ibm-949\n");
1421        else
1422            log_verbose("ucnv_getType ibm-949 ok\n");
1423
1424        if(myConverter!=NULL)
1425            ucnv_getStarters(myConverter, mystarters, &err);
1426
1427        /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1428          log_err("Failed ucnv_getStarters for ksc\n");
1429          else
1430          log_verbose("ucnv_getStarters ok\n");*/
1431
1432    }
1433    ucnv_close(myConverter);
1434
1435    TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1436    TestConverterType("ibm-878", UCNV_SBCS);
1437#endif
1438
1439    TestConverterType("iso-8859-1", UCNV_LATIN_1);
1440
1441    TestConverterType("ibm-1208", UCNV_UTF8);
1442
1443    TestConverterType("utf-8", UCNV_UTF8);
1444    TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1445    TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1446    TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1447    TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1448
1449#if !UCONFIG_NO_LEGACY_CONVERSION
1450
1451#if defined(U_ENABLE_GENERIC_ISO_2022)
1452    TestConverterType("iso-2022", UCNV_ISO_2022);
1453#endif
1454
1455    TestConverterType("hz", UCNV_HZ);
1456#endif
1457
1458    TestConverterType("scsu", UCNV_SCSU);
1459
1460#if !UCONFIG_NO_LEGACY_CONVERSION
1461    TestConverterType("x-iscii-de", UCNV_ISCII);
1462#endif
1463
1464    TestConverterType("ascii", UCNV_US_ASCII);
1465    TestConverterType("utf-7", UCNV_UTF7);
1466    TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1467    TestConverterType("bocu-1", UCNV_BOCU1);
1468}
1469
1470static void
1471TestAmbiguousConverter(UConverter *cnv) {
1472    static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1473    UChar outUnicode[20]={ 0, 0, 0, 0 };
1474
1475    const char *s;
1476    UChar *u;
1477    UErrorCode errorCode;
1478    UBool isAmbiguous;
1479
1480    /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1481    errorCode=U_ZERO_ERROR;
1482    s=inBytes;
1483    u=outUnicode;
1484    ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1485    if(U_FAILURE(errorCode)) {
1486        /* we do not care about general failures in this test; the input may just not be mappable */
1487        return;
1488    }
1489
1490    if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1491        /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1492        /* There are some encodings that are partially ASCII based,
1493        like the ISO-7 and GSM series of codepages, which we ignore. */
1494        return;
1495    }
1496
1497    isAmbiguous=ucnv_isAmbiguous(cnv);
1498
1499    /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1500    if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1501        log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1502            ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1503        return;
1504    }
1505
1506    if(outUnicode[2]!=0x5c) {
1507        /* needs fixup, fix it */
1508        ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1509        if(outUnicode[2]!=0x5c) {
1510            /* the fix failed */
1511            log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1512            return;
1513        }
1514    }
1515}
1516
1517static void TestAmbiguous()
1518{
1519    UErrorCode status = U_ZERO_ERROR;
1520    UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1521    static const char target[] = {
1522        /* "\\usr\\local\\share\\data\\icutest.txt" */
1523        0x5c, 0x75, 0x73, 0x72,
1524        0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1525        0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1526        0x5c, 0x64, 0x61, 0x74, 0x61,
1527        0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1528        0
1529    };
1530    UChar asciiResult[200], sjisResult[200];
1531    int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1532    const char *name;
1533
1534    /* enumerate all converters */
1535    status=U_ZERO_ERROR;
1536    for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1537        cnv=ucnv_open(name, &status);
1538        if(U_SUCCESS(status)) {
1539            /* BEGIN android-changed
1540               To save space, Android does not build full ISO-2022-CN CJK tables. */
1541            const char* cnvName = ucnv_getName(cnv, &status);
1542            if (strlen(cnvName) < 8 ||
1543                strncmp(cnvName, "ISO_2022_CN", 8) != 0) {
1544                TestAmbiguousConverter(cnv);
1545            }
1546            /* END android-changed */
1547            ucnv_close(cnv);
1548        } else {
1549            log_err("error: unable to open available converter \"%s\"\n", name);
1550            status=U_ZERO_ERROR;
1551        }
1552    }
1553
1554#if !UCONFIG_NO_LEGACY_CONVERSION
1555    sjis_cnv = ucnv_open("ibm-943", &status);
1556    if (U_FAILURE(status))
1557    {
1558        log_data_err("Failed to create a SJIS converter\n");
1559        return;
1560    }
1561    ascii_cnv = ucnv_open("LATIN-1", &status);
1562    if (U_FAILURE(status))
1563    {
1564        log_data_err("Failed to create a LATIN-1 converter\n");
1565        ucnv_close(sjis_cnv);
1566        return;
1567    }
1568    /* convert target from SJIS to Unicode */
1569    sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1570    if (U_FAILURE(status))
1571    {
1572        log_err("Failed to convert the SJIS string.\n");
1573        ucnv_close(sjis_cnv);
1574        ucnv_close(ascii_cnv);
1575        return;
1576    }
1577    /* convert target from Latin-1 to Unicode */
1578    /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1579    if (U_FAILURE(status))
1580    {
1581        log_err("Failed to convert the Latin-1 string.\n");
1582        ucnv_close(sjis_cnv);
1583        ucnv_close(ascii_cnv);
1584        return;
1585    }
1586    if (!ucnv_isAmbiguous(sjis_cnv))
1587    {
1588        log_err("SJIS converter should contain ambiguous character mappings.\n");
1589        ucnv_close(sjis_cnv);
1590        ucnv_close(ascii_cnv);
1591        return;
1592    }
1593    if (u_strcmp(sjisResult, asciiResult) == 0)
1594    {
1595        log_err("File separators for SJIS don't need to be fixed.\n");
1596    }
1597    ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1598    if (u_strcmp(sjisResult, asciiResult) != 0)
1599    {
1600        log_err("Fixing file separator for SJIS failed.\n");
1601    }
1602    ucnv_close(sjis_cnv);
1603    ucnv_close(ascii_cnv);
1604#endif
1605}
1606
1607static void
1608TestSignatureDetection(){
1609    /* with null terminated strings */
1610    {
1611        static const char* data[] = {
1612                "\xFE\xFF\x00\x00",     /* UTF-16BE */
1613                "\xFF\xFE\x00\x00",     /* UTF-16LE */
1614                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1615                "\x0E\xFE\xFF\x00",     /* SCSU     */
1616
1617                "\xFE\xFF",             /* UTF-16BE */
1618                "\xFF\xFE",             /* UTF-16LE */
1619                "\xEF\xBB\xBF",         /* UTF-8    */
1620                "\x0E\xFE\xFF",         /* SCSU     */
1621
1622                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1623                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1624                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1625                "\x0E\xFE\xFF\x41",     /* SCSU     */
1626
1627                "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1628                "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1629                "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1630                "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1631                "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1632
1633                "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1634        };
1635        static const char* expected[] = {
1636                "UTF-16BE",
1637                "UTF-16LE",
1638                "UTF-8",
1639                "SCSU",
1640
1641                "UTF-16BE",
1642                "UTF-16LE",
1643                "UTF-8",
1644                "SCSU",
1645
1646                "UTF-16BE",
1647                "UTF-16LE",
1648                "UTF-8",
1649                "SCSU",
1650
1651                "UTF-7",
1652                "UTF-7",
1653                "UTF-7",
1654                "UTF-7",
1655                "UTF-7",
1656                "UTF-EBCDIC"
1657        };
1658        static const int32_t expectedLength[] ={
1659            2,
1660            2,
1661            3,
1662            3,
1663
1664            2,
1665            2,
1666            3,
1667            3,
1668
1669            2,
1670            2,
1671            3,
1672            3,
1673
1674            5,
1675            4,
1676            4,
1677            4,
1678            4,
1679            4
1680        };
1681        int i=0;
1682        UErrorCode err;
1683        int32_t signatureLength = -1;
1684        const char* source = NULL;
1685        const char* enc = NULL;
1686        for( ; i<sizeof(data)/sizeof(char*); i++){
1687            err = U_ZERO_ERROR;
1688            source = data[i];
1689            enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1690            if(U_FAILURE(err)){
1691                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1692                continue;
1693            }
1694            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1695                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1696                continue;
1697            }
1698            if(signatureLength != expectedLength[i]){
1699                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1700            }
1701        }
1702    }
1703    {
1704        static const char* data[] = {
1705                "\xFE\xFF\x00",         /* UTF-16BE */
1706                "\xFF\xFE\x00",         /* UTF-16LE */
1707                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1708                "\x0E\xFE\xFF\x00",     /* SCSU     */
1709                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1710                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1711                "\xFE\xFF",             /* UTF-16BE */
1712                "\xFF\xFE",             /* UTF-16LE */
1713                "\xEF\xBB\xBF",         /* UTF-8    */
1714                "\x0E\xFE\xFF",         /* SCSU     */
1715                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1716                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1717                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1718                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1719                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1720                "\x0E\xFE\xFF\x41",     /* SCSU     */
1721                "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1722                "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1723                "\xFB\xEE\x28",         /* BOCU-1   */
1724                "\xFF\x41\x42"          /* NULL     */
1725        };
1726        static const int len[] = {
1727            3,
1728            3,
1729            4,
1730            4,
1731            4,
1732            4,
1733            2,
1734            2,
1735            3,
1736            3,
1737            4,
1738            4,
1739            4,
1740            4,
1741            4,
1742            4,
1743            5,
1744            5,
1745            3,
1746            3
1747        };
1748
1749        static const char* expected[] = {
1750                "UTF-16BE",
1751                "UTF-16LE",
1752                "UTF-8",
1753                "SCSU",
1754                "UTF-32BE",
1755                "UTF-32LE",
1756                "UTF-16BE",
1757                "UTF-16LE",
1758                "UTF-8",
1759                "SCSU",
1760                "UTF-32BE",
1761                "UTF-32LE",
1762                "UTF-16BE",
1763                "UTF-16LE",
1764                "UTF-8",
1765                "SCSU",
1766                "UTF-32BE",
1767                "UTF-32LE",
1768                "BOCU-1",
1769                NULL
1770        };
1771        static const int32_t expectedLength[] ={
1772            2,
1773            2,
1774            3,
1775            3,
1776            4,
1777            4,
1778            2,
1779            2,
1780            3,
1781            3,
1782            4,
1783            4,
1784            2,
1785            2,
1786            3,
1787            3,
1788            4,
1789            4,
1790            3,
1791            0
1792        };
1793        int i=0;
1794        UErrorCode err;
1795        int32_t signatureLength = -1;
1796        int32_t sourceLength=-1;
1797        const char* source = NULL;
1798        const char* enc = NULL;
1799        for( ; i<sizeof(data)/sizeof(char*); i++){
1800            err = U_ZERO_ERROR;
1801            source = data[i];
1802            sourceLength = len[i];
1803            enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1804            if(U_FAILURE(err)){
1805                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1806                continue;
1807            }
1808            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1809                if(expected[i] !=NULL){
1810                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1811                 continue;
1812                }
1813            }
1814            if(signatureLength != expectedLength[i]){
1815                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1816            }
1817        }
1818    }
1819}
1820
1821static void TestUTF7() {
1822    /* test input */
1823    static const uint8_t in[]={
1824        /* H - +Jjo- - ! +- +2AHcAQ */
1825        0x48,
1826        0x2d,
1827        0x2b, 0x4a, 0x6a, 0x6f,
1828        0x2d, 0x2d,
1829        0x21,
1830        0x2b, 0x2d,
1831        0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1832    };
1833
1834    /* expected test results */
1835    static const int32_t results[]={
1836        /* number of bytes read, code point */
1837        1, 0x48,
1838        1, 0x2d,
1839        4, 0x263a, /* <WHITE SMILING FACE> */
1840        2, 0x2d,
1841        1, 0x21,
1842        2, 0x2b,
1843        7, 0x10401
1844    };
1845
1846    const char *cnvName;
1847    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1848    UErrorCode errorCode=U_ZERO_ERROR;
1849    UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1850    if(U_FAILURE(errorCode)) {
1851        log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1852        return;
1853    }
1854    TestNextUChar(cnv, source, limit, results, "UTF-7");
1855    /* Test the condition when source >= sourceLimit */
1856    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1857    cnvName = ucnv_getName(cnv, &errorCode);
1858    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1859        log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1860    }
1861    ucnv_close(cnv);
1862}
1863
1864static void TestIMAP() {
1865    /* test input */
1866    static const uint8_t in[]={
1867        /* H - &Jjo- - ! &- &2AHcAQ- \ */
1868        0x48,
1869        0x2d,
1870        0x26, 0x4a, 0x6a, 0x6f,
1871        0x2d, 0x2d,
1872        0x21,
1873        0x26, 0x2d,
1874        0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1875    };
1876
1877    /* expected test results */
1878    static const int32_t results[]={
1879        /* number of bytes read, code point */
1880        1, 0x48,
1881        1, 0x2d,
1882        4, 0x263a, /* <WHITE SMILING FACE> */
1883        2, 0x2d,
1884        1, 0x21,
1885        2, 0x26,
1886        7, 0x10401
1887    };
1888
1889    const char *cnvName;
1890    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1891    UErrorCode errorCode=U_ZERO_ERROR;
1892    UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1893    if(U_FAILURE(errorCode)) {
1894        log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1895        return;
1896    }
1897    TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1898    /* Test the condition when source >= sourceLimit */
1899    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1900    cnvName = ucnv_getName(cnv, &errorCode);
1901    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1902        log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1903    }
1904    ucnv_close(cnv);
1905}
1906
1907static void TestUTF8() {
1908    /* test input */
1909    static const uint8_t in[]={
1910        0x61,
1911        0xc2, 0x80,
1912        0xe0, 0xa0, 0x80,
1913        0xf0, 0x90, 0x80, 0x80,
1914        0xf4, 0x84, 0x8c, 0xa1,
1915        0xf0, 0x90, 0x90, 0x81
1916    };
1917
1918    /* expected test results */
1919    static const int32_t results[]={
1920        /* number of bytes read, code point */
1921        1, 0x61,
1922        2, 0x80,
1923        3, 0x800,
1924        4, 0x10000,
1925        4, 0x104321,
1926        4, 0x10401
1927    };
1928
1929    /* error test input */
1930    static const uint8_t in2[]={
1931        0x61,
1932        0xc0, 0x80,                     /* illegal non-shortest form */
1933        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1934        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1935        0xc0, 0xc0,                     /* illegal trail byte */
1936        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1937        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1938        0xfe,                           /* illegal byte altogether */
1939        0x62
1940    };
1941
1942    /* expected error test results */
1943    static const int32_t results2[]={
1944        /* number of bytes read, code point */
1945        1, 0x61,
1946        22, 0x62
1947    };
1948
1949    UConverterToUCallback cb;
1950    const void *p;
1951
1952    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1953    UErrorCode errorCode=U_ZERO_ERROR;
1954    UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1955    if(U_FAILURE(errorCode)) {
1956        log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1957        return;
1958    }
1959    TestNextUChar(cnv, source, limit, results, "UTF-8");
1960    /* Test the condition when source >= sourceLimit */
1961    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1962
1963    /* test error behavior with a skip callback */
1964    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1965    source=(const char *)in2;
1966    limit=(const char *)(in2+sizeof(in2));
1967    TestNextUChar(cnv, source, limit, results2, "UTF-8");
1968
1969    ucnv_close(cnv);
1970}
1971
1972static void TestCESU8() {
1973    /* test input */
1974    static const uint8_t in[]={
1975        0x61,
1976        0xc2, 0x80,
1977        0xe0, 0xa0, 0x80,
1978        0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1979        0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1980        0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1981        0xef, 0xbf, 0xbc
1982    };
1983
1984    /* expected test results */
1985    static const int32_t results[]={
1986        /* number of bytes read, code point */
1987        1, 0x61,
1988        2, 0x80,
1989        3, 0x800,
1990        6, 0x10000,
1991        3, 0xdc01,
1992        -1,0xd802,  /* may read 3 or 6 bytes */
1993        -1,0x10ffff,/* may read 0 or 3 bytes */
1994        3, 0xfffc
1995    };
1996
1997    /* error test input */
1998    static const uint8_t in2[]={
1999        0x61,
2000        0xc0, 0x80,                     /* illegal non-shortest form */
2001        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
2002        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
2003        0xc0, 0xc0,                     /* illegal trail byte */
2004        0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
2005        0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
2006        0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
2007        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
2008        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
2009        0xfe,                           /* illegal byte altogether */
2010        0x62
2011    };
2012
2013    /* expected error test results */
2014    static const int32_t results2[]={
2015        /* number of bytes read, code point */
2016        1, 0x61,
2017        34, 0x62
2018    };
2019
2020    UConverterToUCallback cb;
2021    const void *p;
2022
2023    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2024    UErrorCode errorCode=U_ZERO_ERROR;
2025    UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2026    if(U_FAILURE(errorCode)) {
2027        log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2028        return;
2029    }
2030    TestNextUChar(cnv, source, limit, results, "CESU-8");
2031    /* Test the condition when source >= sourceLimit */
2032    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2033
2034    /* test error behavior with a skip callback */
2035    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2036    source=(const char *)in2;
2037    limit=(const char *)(in2+sizeof(in2));
2038    TestNextUChar(cnv, source, limit, results2, "CESU-8");
2039
2040    ucnv_close(cnv);
2041}
2042
2043static void TestUTF16() {
2044    /* test input */
2045    static const uint8_t in1[]={
2046        0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2047    };
2048    static const uint8_t in2[]={
2049        0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2050    };
2051    static const uint8_t in3[]={
2052        0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2053    };
2054
2055    /* expected test results */
2056    static const int32_t results1[]={
2057        /* number of bytes read, code point */
2058        4, 0x4e00,
2059        2, 0xfeff
2060    };
2061    static const int32_t results2[]={
2062        /* number of bytes read, code point */
2063        4, 0x004e,
2064        2, 0xfffe
2065    };
2066    static const int32_t results3[]={
2067        /* number of bytes read, code point */
2068        2, 0xfefe,
2069        2, 0x4e00,
2070        2, 0xfeff,
2071        4, 0x20001
2072    };
2073
2074    const char *source, *limit;
2075
2076    UErrorCode errorCode=U_ZERO_ERROR;
2077    UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2078    if(U_FAILURE(errorCode)) {
2079        log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2080        return;
2081    }
2082
2083    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2084    TestNextUChar(cnv, source, limit, results1, "UTF-16");
2085
2086    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2087    ucnv_resetToUnicode(cnv);
2088    TestNextUChar(cnv, source, limit, results2, "UTF-16");
2089
2090    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2091    ucnv_resetToUnicode(cnv);
2092    TestNextUChar(cnv, source, limit, results3, "UTF-16");
2093
2094    /* Test the condition when source >= sourceLimit */
2095    ucnv_resetToUnicode(cnv);
2096    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2097
2098    ucnv_close(cnv);
2099}
2100
2101static void TestUTF16BE() {
2102    /* test input */
2103    static const uint8_t in[]={
2104        0x00, 0x61,
2105        0x00, 0xc0,
2106        0x00, 0x31,
2107        0x00, 0xf4,
2108        0xce, 0xfe,
2109        0xd8, 0x01, 0xdc, 0x01
2110    };
2111
2112    /* expected test results */
2113    static const int32_t results[]={
2114        /* number of bytes read, code point */
2115        2, 0x61,
2116        2, 0xc0,
2117        2, 0x31,
2118        2, 0xf4,
2119        2, 0xcefe,
2120        4, 0x10401
2121    };
2122
2123    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2124    UErrorCode errorCode=U_ZERO_ERROR;
2125    UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2126    if(U_FAILURE(errorCode)) {
2127        log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2128        return;
2129    }
2130    TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2131    /* Test the condition when source >= sourceLimit */
2132    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2133    /*Test for the condition where there is an invalid character*/
2134    {
2135        static const uint8_t source2[]={0x61};
2136        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2137        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2138    }
2139#if 0
2140    /*
2141     * Test disabled because currently the UTF-16BE/LE converters are supposed
2142     * to not set errors for unpaired surrogates.
2143     * This may change with
2144     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2145     */
2146
2147    /*Test for the condition where there is a surrogate pair*/
2148    {
2149        const uint8_t source2[]={0xd8, 0x01};
2150        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2151    }
2152#endif
2153    ucnv_close(cnv);
2154}
2155
2156static void
2157TestUTF16LE() {
2158    /* test input */
2159    static const uint8_t in[]={
2160        0x61, 0x00,
2161        0x31, 0x00,
2162        0x4e, 0x2e,
2163        0x4e, 0x00,
2164        0x01, 0xd8, 0x01, 0xdc
2165    };
2166
2167    /* expected test results */
2168    static const int32_t results[]={
2169        /* number of bytes read, code point */
2170        2, 0x61,
2171        2, 0x31,
2172        2, 0x2e4e,
2173        2, 0x4e,
2174        4, 0x10401
2175    };
2176
2177    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2178    UErrorCode errorCode=U_ZERO_ERROR;
2179    UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2180    if(U_FAILURE(errorCode)) {
2181        log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2182        return;
2183    }
2184    TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2185    /* Test the condition when source >= sourceLimit */
2186    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2187    /*Test for the condition where there is an invalid character*/
2188    {
2189        static const uint8_t source2[]={0x61};
2190        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2191        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2192    }
2193#if 0
2194    /*
2195     * Test disabled because currently the UTF-16BE/LE converters are supposed
2196     * to not set errors for unpaired surrogates.
2197     * This may change with
2198     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2199     */
2200
2201    /*Test for the condition where there is a surrogate character*/
2202    {
2203        static const uint8_t source2[]={0x01, 0xd8};
2204        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2205    }
2206#endif
2207
2208    ucnv_close(cnv);
2209}
2210
2211static void TestUTF32() {
2212    /* test input */
2213    static const uint8_t in1[]={
2214        0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2215    };
2216    static const uint8_t in2[]={
2217        0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2218    };
2219    static const uint8_t in3[]={
2220        0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2221    };
2222
2223    /* expected test results */
2224    static const int32_t results1[]={
2225        /* number of bytes read, code point */
2226        8, 0x100f00,
2227        4, 0xfeff
2228    };
2229    static const int32_t results2[]={
2230        /* number of bytes read, code point */
2231        8, 0x0f1000,
2232        4, 0xfffe
2233    };
2234    static const int32_t results3[]={
2235        /* number of bytes read, code point */
2236        4, 0xfefe,
2237        4, 0x100f00,
2238        4, 0xfffd, /* unmatched surrogate */
2239        4, 0xfffd  /* unmatched surrogate */
2240    };
2241
2242    const char *source, *limit;
2243
2244    UErrorCode errorCode=U_ZERO_ERROR;
2245    UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2246    if(U_FAILURE(errorCode)) {
2247        log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2248        return;
2249    }
2250
2251    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2252    TestNextUChar(cnv, source, limit, results1, "UTF-32");
2253
2254    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2255    ucnv_resetToUnicode(cnv);
2256    TestNextUChar(cnv, source, limit, results2, "UTF-32");
2257
2258    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2259    ucnv_resetToUnicode(cnv);
2260    TestNextUChar(cnv, source, limit, results3, "UTF-32");
2261
2262    /* Test the condition when source >= sourceLimit */
2263    ucnv_resetToUnicode(cnv);
2264    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2265
2266    ucnv_close(cnv);
2267}
2268
2269static void
2270TestUTF32BE() {
2271    /* test input */
2272    static const uint8_t in[]={
2273        0x00, 0x00, 0x00, 0x61,
2274        0x00, 0x00, 0x30, 0x61,
2275        0x00, 0x00, 0xdc, 0x00,
2276        0x00, 0x00, 0xd8, 0x00,
2277        0x00, 0x00, 0xdf, 0xff,
2278        0x00, 0x00, 0xff, 0xfe,
2279        0x00, 0x10, 0xab, 0xcd,
2280        0x00, 0x10, 0xff, 0xff
2281    };
2282
2283    /* expected test results */
2284    static const int32_t results[]={
2285        /* number of bytes read, code point */
2286        4, 0x61,
2287        4, 0x3061,
2288        4, 0xfffd,
2289        4, 0xfffd,
2290        4, 0xfffd,
2291        4, 0xfffe,
2292        4, 0x10abcd,
2293        4, 0x10ffff
2294    };
2295
2296    /* error test input */
2297    static const uint8_t in2[]={
2298        0x00, 0x00, 0x00, 0x61,
2299        0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2300        0x00, 0x00, 0x00, 0x62,
2301        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2302        0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2303        0x00, 0x00, 0x01, 0x62,
2304        0x00, 0x00, 0x02, 0x62
2305    };
2306
2307    /* expected error test results */
2308    static const int32_t results2[]={
2309        /* number of bytes read, code point */
2310        4,  0x61,
2311        8,  0x62,
2312        12, 0x162,
2313        4,  0x262
2314    };
2315
2316    UConverterToUCallback cb;
2317    const void *p;
2318
2319    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2320    UErrorCode errorCode=U_ZERO_ERROR;
2321    UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2322    if(U_FAILURE(errorCode)) {
2323        log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2324        return;
2325    }
2326    TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2327
2328    /* Test the condition when source >= sourceLimit */
2329    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2330
2331    /* test error behavior with a skip callback */
2332    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2333    source=(const char *)in2;
2334    limit=(const char *)(in2+sizeof(in2));
2335    TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2336
2337    ucnv_close(cnv);
2338}
2339
2340static void
2341TestUTF32LE() {
2342    /* test input */
2343    static const uint8_t in[]={
2344        0x61, 0x00, 0x00, 0x00,
2345        0x61, 0x30, 0x00, 0x00,
2346        0x00, 0xdc, 0x00, 0x00,
2347        0x00, 0xd8, 0x00, 0x00,
2348        0xff, 0xdf, 0x00, 0x00,
2349        0xfe, 0xff, 0x00, 0x00,
2350        0xcd, 0xab, 0x10, 0x00,
2351        0xff, 0xff, 0x10, 0x00
2352    };
2353
2354    /* expected test results */
2355    static const int32_t results[]={
2356        /* number of bytes read, code point */
2357        4, 0x61,
2358        4, 0x3061,
2359        4, 0xfffd,
2360        4, 0xfffd,
2361        4, 0xfffd,
2362        4, 0xfffe,
2363        4, 0x10abcd,
2364        4, 0x10ffff
2365    };
2366
2367    /* error test input */
2368    static const uint8_t in2[]={
2369        0x61, 0x00, 0x00, 0x00,
2370        0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2371        0x62, 0x00, 0x00, 0x00,
2372        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2373        0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2374        0x62, 0x01, 0x00, 0x00,
2375        0x62, 0x02, 0x00, 0x00,
2376    };
2377
2378    /* expected error test results */
2379    static const int32_t results2[]={
2380        /* number of bytes read, code point */
2381        4,  0x61,
2382        8,  0x62,
2383        12, 0x162,
2384        4,  0x262,
2385    };
2386
2387    UConverterToUCallback cb;
2388    const void *p;
2389
2390    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2391    UErrorCode errorCode=U_ZERO_ERROR;
2392    UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2393    if(U_FAILURE(errorCode)) {
2394        log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2395        return;
2396    }
2397    TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2398
2399    /* Test the condition when source >= sourceLimit */
2400    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2401
2402    /* test error behavior with a skip callback */
2403    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2404    source=(const char *)in2;
2405    limit=(const char *)(in2+sizeof(in2));
2406    TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2407
2408    ucnv_close(cnv);
2409}
2410
2411static void
2412TestLATIN1() {
2413    /* test input */
2414    static const uint8_t in[]={
2415       0x61,
2416       0x31,
2417       0x32,
2418       0xc0,
2419       0xf0,
2420       0xf4,
2421    };
2422
2423    /* expected test results */
2424    static const int32_t results[]={
2425        /* number of bytes read, code point */
2426        1, 0x61,
2427        1, 0x31,
2428        1, 0x32,
2429        1, 0xc0,
2430        1, 0xf0,
2431        1, 0xf4,
2432    };
2433    static const uint16_t in1[] = {
2434        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2435        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2436        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2437        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2438        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2439        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2440        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2441        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2442        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2443        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2444        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2445        0xcb, 0x82
2446    };
2447    static const uint8_t out1[] = {
2448        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2449        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2450        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2451        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2452        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2453        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2454        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2455        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2456        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2457        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2458        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2459        0xcb, 0x82
2460    };
2461    static const uint16_t in2[]={
2462        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2463        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2464        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2465        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2466        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2467        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2468        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2469        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2470        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2471        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2472        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2473        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2474        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2475        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2476        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2477        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2478        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2479        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2480        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2481        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2482        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2483        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2484        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2485        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2486        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2487        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2488        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2489        0x37, 0x20, 0x2A, 0x2F,
2490    };
2491    static const unsigned char out2[]={
2492        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2493        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2494        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2495        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2496        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2497        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2498        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2499        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2500        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2501        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2502        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2503        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2504        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2505        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2506        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2507        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2508        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2509        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2510        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2511        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2512        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2513        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2514        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2515        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2516        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2517        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2518        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2519        0x37, 0x20, 0x2A, 0x2F,
2520    };
2521    const char *source=(const char *)in;
2522    const char *limit=(const char *)in+sizeof(in);
2523
2524    UErrorCode errorCode=U_ZERO_ERROR;
2525    UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2526    if(U_FAILURE(errorCode)) {
2527        log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2528        return;
2529    }
2530    TestNextUChar(cnv, source, limit, results, "LATIN_1");
2531    /* Test the condition when source >= sourceLimit */
2532    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2533    TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2534    TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2535
2536    ucnv_close(cnv);
2537}
2538
2539static void
2540TestSBCS() {
2541    /* test input */
2542    static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2543    /* expected test results */
2544    static const int32_t results[]={
2545        /* number of bytes read, code point */
2546        1, 0x61,
2547        1, 0xbf,
2548        1, 0xc4,
2549        1, 0x2021,
2550        1, 0xf8ff,
2551        1, 0x00d9
2552    };
2553
2554    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2555    UErrorCode errorCode=U_ZERO_ERROR;
2556    UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2557    if(U_FAILURE(errorCode)) {
2558        log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2559        return;
2560    }
2561    TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2562    /* Test the condition when source >= sourceLimit */
2563    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2564    /*Test for Illegal character */ /*
2565    {
2566    static const uint8_t input1[]={ 0xA1 };
2567    const char* illegalsource=(const char*)input1;
2568    TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2569    }
2570   */
2571    ucnv_close(cnv);
2572}
2573
2574static void
2575TestDBCS() {
2576    /* test input */
2577    static const uint8_t in[]={
2578        0x44, 0x6a,
2579        0xc4, 0x9c,
2580        0x7a, 0x74,
2581        0x46, 0xab,
2582        0x42, 0x5b,
2583
2584    };
2585
2586    /* expected test results */
2587    static const int32_t results[]={
2588        /* number of bytes read, code point */
2589        2, 0x00a7,
2590        2, 0xe1d2,
2591        2, 0x6962,
2592        2, 0xf842,
2593        2, 0xffe5,
2594    };
2595
2596    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2597    UErrorCode errorCode=U_ZERO_ERROR;
2598
2599    UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2600    if(U_FAILURE(errorCode)) {
2601        log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2602        return;
2603    }
2604    TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2605    /* Test the condition when source >= sourceLimit */
2606    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2607    /*Test for the condition where there is an invalid character*/
2608    {
2609        static const uint8_t source2[]={0x1a, 0x1b};
2610        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2611    }
2612    /*Test for the condition where we have a truncated char*/
2613    {
2614        static const uint8_t source1[]={0xc4};
2615        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2616        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2617    }
2618    ucnv_close(cnv);
2619}
2620
2621static void
2622TestMBCS() {
2623    /* test input */
2624    static const uint8_t in[]={
2625        0x01,
2626        0xa6, 0xa3,
2627        0x00,
2628        0xa6, 0xa1,
2629        0x08,
2630        0xc2, 0x76,
2631        0xc2, 0x78,
2632
2633    };
2634
2635    /* expected test results */
2636    static const int32_t results[]={
2637        /* number of bytes read, code point */
2638        1, 0x0001,
2639        2, 0x250c,
2640        1, 0x0000,
2641        2, 0x2500,
2642        1, 0x0008,
2643        2, 0xd60c,
2644        2, 0xd60e,
2645    };
2646
2647    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2648    UErrorCode errorCode=U_ZERO_ERROR;
2649
2650    UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2651    if(U_FAILURE(errorCode)) {
2652        log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2653        return;
2654    }
2655    TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2656    /* Test the condition when source >= sourceLimit */
2657    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2658    /*Test for the condition where there is an invalid character*/
2659    {
2660        static const uint8_t source2[]={0xa1, 0x80};
2661        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2662    }
2663    /*Test for the condition where we have a truncated char*/
2664    {
2665        static const uint8_t source1[]={0xc4};
2666        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2667        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2668    }
2669    ucnv_close(cnv);
2670
2671}
2672
2673#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2674static void
2675TestICCRunout() {
2676/*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2677
2678    const char *cnvName = "ibm-1363";
2679    UErrorCode status = U_ZERO_ERROR;
2680    const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2681    /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2682    const char *source = sourceData;
2683    const char *sourceLim = sourceData+sizeof(sourceData);
2684    UChar c1, c2, c3;
2685    UConverter *cnv=ucnv_open(cnvName, &status);
2686    if(U_FAILURE(status)) {
2687        log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2688	return;
2689    }
2690
2691#if 0
2692    {
2693    UChar   targetBuf[256];
2694    UChar   *target = targetBuf;
2695    UChar   *targetLim = target+256;
2696    ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2697
2698    log_info("After convert: target@%d, source@%d, status%s\n",
2699	     target-targetBuf, source-sourceData, u_errorName(status));
2700
2701    if(U_FAILURE(status)) {
2702	log_err("Failed to convert: %s\n", u_errorName(status));
2703    } else {
2704
2705    }
2706    }
2707#endif
2708
2709    c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2710    log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2711
2712    c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2713    log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2714
2715    c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2716    log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2717
2718    if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2719	log_verbose("OK\n");
2720    } else {
2721	log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2722    }
2723
2724    ucnv_close(cnv);
2725
2726}
2727#endif
2728
2729#ifdef U_ENABLE_GENERIC_ISO_2022
2730
2731static void
2732TestISO_2022() {
2733    /* test input */
2734    static const uint8_t in[]={
2735        0x1b, 0x25, 0x42,
2736        0x31,
2737        0x32,
2738        0x61,
2739        0xc2, 0x80,
2740        0xe0, 0xa0, 0x80,
2741        0xf0, 0x90, 0x80, 0x80
2742    };
2743
2744
2745
2746    /* expected test results */
2747    static const int32_t results[]={
2748        /* number of bytes read, code point */
2749        4, 0x0031,  /* 4 bytes including the escape sequence */
2750        1, 0x0032,
2751        1, 0x61,
2752        2, 0x80,
2753        3, 0x800,
2754        4, 0x10000
2755    };
2756
2757    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2758    UErrorCode errorCode=U_ZERO_ERROR;
2759    UConverter *cnv;
2760
2761    cnv=ucnv_open("ISO_2022", &errorCode);
2762    if(U_FAILURE(errorCode)) {
2763        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2764        return;
2765    }
2766    TestNextUChar(cnv, source, limit, results, "ISO_2022");
2767
2768    /* Test the condition when source >= sourceLimit */
2769    TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2770    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2771    /*Test for the condition where we have a truncated char*/
2772    {
2773        static const uint8_t source1[]={0xc4};
2774        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2775        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2776    }
2777    /*Test for the condition where there is an invalid character*/
2778    {
2779        static const uint8_t source2[]={0xa1, 0x01};
2780        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2781    }
2782    ucnv_close(cnv);
2783}
2784
2785#endif
2786
2787static void
2788TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2789    const UChar* uSource;
2790    const UChar* uSourceLimit;
2791    const char* cSource;
2792    const char* cSourceLimit;
2793    UChar *uTargetLimit =NULL;
2794    UChar *uTarget;
2795    char *cTarget;
2796    const char *cTargetLimit;
2797    char *cBuf;
2798    UChar *uBuf; /*,*test;*/
2799    int32_t uBufSize = 120;
2800    int len=0;
2801    int i=2;
2802    UErrorCode errorCode=U_ZERO_ERROR;
2803    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2804    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2805    ucnv_reset(cnv);
2806    for(;--i>0; ){
2807        uSource = (UChar*) source;
2808        uSourceLimit=(const UChar*)sourceLimit;
2809        cTarget = cBuf;
2810        uTarget = uBuf;
2811        cSource = cBuf;
2812        cTargetLimit = cBuf;
2813        uTargetLimit = uBuf;
2814
2815        do{
2816
2817            cTargetLimit = cTargetLimit+ i;
2818            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2819            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2820               errorCode=U_ZERO_ERROR;
2821                continue;
2822            }
2823
2824            if(U_FAILURE(errorCode)){
2825                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2826                return;
2827            }
2828
2829        }while (uSource<uSourceLimit);
2830
2831        cSourceLimit =cTarget;
2832        do{
2833            uTargetLimit=uTargetLimit+i;
2834            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2835            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2836               errorCode=U_ZERO_ERROR;
2837                continue;
2838            }
2839            if(U_FAILURE(errorCode)){
2840                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2841                    return;
2842            }
2843        }while(cSource<cSourceLimit);
2844
2845        uSource = source;
2846        /*test =uBuf;*/
2847        for(len=0;len<(int)(source - sourceLimit);len++){
2848            if(uBuf[len]!=uSource[len]){
2849                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2850            }
2851        }
2852    }
2853    free(uBuf);
2854    free(cBuf);
2855}
2856/* Test for Jitterbug 778 */
2857static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2858    const UChar* uSource;
2859    const UChar* uSourceLimit;
2860    const char* cSource;
2861    UChar *uTargetLimit =NULL;
2862    UChar *uTarget;
2863    char *cTarget;
2864    const char *cTargetLimit;
2865    char *cBuf;
2866    UChar *uBuf,*test;
2867    int32_t uBufSize = 120;
2868    int numCharsInTarget=0;
2869    UErrorCode errorCode=U_ZERO_ERROR;
2870    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2871    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2872    uSource = source;
2873    uSourceLimit=sourceLimit;
2874    cTarget = cBuf;
2875    cTargetLimit = cBuf +uBufSize*5;
2876    uTarget = uBuf;
2877    uTargetLimit = uBuf+ uBufSize*5;
2878    ucnv_reset(cnv);
2879    numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2880    if(U_FAILURE(errorCode)){
2881        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2882        return;
2883    }
2884    cSource = cBuf;
2885    test =uBuf;
2886    ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2887    if(U_FAILURE(errorCode)){
2888        log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2889        return;
2890    }
2891    uSource = source;
2892    while(uSource<uSourceLimit){
2893        if(*test!=*uSource){
2894
2895            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2896        }
2897        uSource++;
2898        test++;
2899    }
2900    free(uBuf);
2901    free(cBuf);
2902}
2903
2904static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2905    const UChar* uSource;
2906    const UChar* uSourceLimit;
2907    const char* cSource;
2908    const char* cSourceLimit;
2909    UChar *uTargetLimit =NULL;
2910    UChar *uTarget;
2911    char *cTarget;
2912    const char *cTargetLimit;
2913    char *cBuf;
2914    UChar *uBuf; /*,*test;*/
2915    int32_t uBufSize = 120;
2916    int len=0;
2917    int i=2;
2918    const UChar *temp = sourceLimit;
2919    UErrorCode errorCode=U_ZERO_ERROR;
2920    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2921    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2922
2923    ucnv_reset(cnv);
2924    for(;--i>0;){
2925        uSource = (UChar*) source;
2926        cTarget = cBuf;
2927        uTarget = uBuf;
2928        cSource = cBuf;
2929        cTargetLimit = cBuf;
2930        uTargetLimit = uBuf+uBufSize*5;
2931        cTargetLimit = cTargetLimit+uBufSize*10;
2932        uSourceLimit=uSource;
2933        do{
2934
2935            if (uSourceLimit < sourceLimit) {
2936                uSourceLimit = uSourceLimit+1;
2937            }
2938            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2939            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2940               errorCode=U_ZERO_ERROR;
2941                continue;
2942            }
2943
2944            if(U_FAILURE(errorCode)){
2945                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2946                return;
2947            }
2948
2949        }while (uSource<temp);
2950
2951        cSourceLimit =cBuf;
2952        do{
2953            if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2954                cSourceLimit = cSourceLimit+1;
2955            }
2956            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2957            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2958               errorCode=U_ZERO_ERROR;
2959                continue;
2960            }
2961            if(U_FAILURE(errorCode)){
2962                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2963                    return;
2964            }
2965        }while(cSource<cTarget);
2966
2967        uSource = source;
2968        /*test =uBuf;*/
2969        for(;len<(int)(source - sourceLimit);len++){
2970            if(uBuf[len]!=uSource[len]){
2971                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2972            }
2973        }
2974    }
2975    free(uBuf);
2976    free(cBuf);
2977}
2978static void
2979TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2980                     const uint16_t results[], const char* message){
2981/*     const char* s0; */
2982     const char* s=(char*)source;
2983     const uint16_t *r=results;
2984     UErrorCode errorCode=U_ZERO_ERROR;
2985     uint32_t c,exC;
2986     ucnv_reset(cnv);
2987     while(s<limit) {
2988	 /* s0=s; */
2989        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2990        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2991            break; /* no more significant input */
2992        } else if(U_FAILURE(errorCode)) {
2993            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2994            break;
2995        } else {
2996            if(U16_IS_LEAD(*r)){
2997                int i =0, len = 2;
2998                U16_NEXT(r, i, len, exC);
2999                r++;
3000            }else{
3001                exC = *r;
3002            }
3003            if(c!=(uint32_t)(exC))
3004                log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
3005        }
3006        r++;
3007    }
3008}
3009
3010static int TestJitterbug930(const char* enc){
3011    UErrorCode err = U_ZERO_ERROR;
3012    UConverter*converter;
3013    char out[80];
3014    char*target = out;
3015    UChar in[4];
3016    const UChar*source = in;
3017    int32_t off[80];
3018    int32_t* offsets = off;
3019    int numOffWritten=0;
3020    UBool flush = 0;
3021    converter = my_ucnv_open(enc, &err);
3022
3023    in[0] = 0x41;     /* 0x4E00;*/
3024    in[1] = 0x4E01;
3025    in[2] = 0x4E02;
3026    in[3] = 0x4E03;
3027
3028    memset(off, '*', sizeof(off));
3029
3030    ucnv_fromUnicode (converter,
3031            &target,
3032            target+2,
3033            &source,
3034            source+3,
3035            offsets,
3036            flush,
3037            &err);
3038
3039        /* writes three bytes into the output buffer: 41 1B 24
3040        * but offsets contains 0 1 1
3041    */
3042    while(*offsets< off[10]){
3043        numOffWritten++;
3044        offsets++;
3045    }
3046    log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3047    if(numOffWritten!= (int)(target-out)){
3048        log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3049    }
3050
3051    err = U_ZERO_ERROR;
3052
3053    memset(off,'*' , sizeof(off));
3054
3055    flush = 1;
3056    offsets=off;
3057    ucnv_fromUnicode (converter,
3058            &target,
3059            target+4,
3060            &source,
3061            source,
3062            offsets,
3063            flush,
3064            &err);
3065    numOffWritten=0;
3066    while(*offsets< off[10]){
3067        numOffWritten++;
3068        if(*offsets!= -1){
3069            log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3070        }
3071        offsets++;
3072    }
3073
3074    /* writes 42 43 7A into output buffer,
3075     * offsets contains -1 -1 -1
3076     */
3077    ucnv_close(converter);
3078    return 0;
3079}
3080
3081static void
3082TestHZ() {
3083    /* test input */
3084    static const uint16_t in[]={
3085            0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3086            0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3087            0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3088            0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3089            0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3090            0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3091            0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3092            0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3093            0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3094            0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3095            0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3096            0x005A, 0x005B, 0x005C, 0x000A
3097      };
3098    const UChar* uSource;
3099    const UChar* uSourceLimit;
3100    const char* cSource;
3101    const char* cSourceLimit;
3102    UChar *uTargetLimit =NULL;
3103    UChar *uTarget;
3104    char *cTarget;
3105    const char *cTargetLimit;
3106    char *cBuf;
3107    UChar *uBuf,*test;
3108    int32_t uBufSize = 120;
3109    UErrorCode errorCode=U_ZERO_ERROR;
3110    UConverter *cnv;
3111    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3112    int32_t* myOff= offsets;
3113    cnv=ucnv_open("HZ", &errorCode);
3114    if(U_FAILURE(errorCode)) {
3115        log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3116        return;
3117    }
3118
3119    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3120    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3121    uSource = (const UChar*)in;
3122    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3123    cTarget = cBuf;
3124    cTargetLimit = cBuf +uBufSize*5;
3125    uTarget = uBuf;
3126    uTargetLimit = uBuf+ uBufSize*5;
3127    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3128    if(U_FAILURE(errorCode)){
3129        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3130        return;
3131    }
3132    cSource = cBuf;
3133    cSourceLimit =cTarget;
3134    test =uBuf;
3135    myOff=offsets;
3136    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3137    if(U_FAILURE(errorCode)){
3138        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3139        return;
3140    }
3141    uSource = (const UChar*)in;
3142    while(uSource<uSourceLimit){
3143        if(*test!=*uSource){
3144
3145            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3146        }
3147        uSource++;
3148        test++;
3149    }
3150    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3151    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3152    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3153    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3154    TestJitterbug930("csISO2022JP");
3155    ucnv_close(cnv);
3156    free(offsets);
3157    free(uBuf);
3158    free(cBuf);
3159}
3160
3161static void
3162TestISCII(){
3163        /* test input */
3164    static const uint16_t in[]={
3165        /* test full range of Devanagari */
3166        0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3167        0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3168        0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3169        0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3170        0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3171        0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3172        0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3173        0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3174        0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3175        0x096D,0x096E,0x096F,
3176        /* test Soft halant*/
3177        0x0915,0x094d, 0x200D,
3178        /* test explicit halant */
3179        0x0915,0x094d, 0x200c,
3180        /* test double danda */
3181        0x965,
3182        /* test ASCII */
3183        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3184        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3185        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3186        /* tests from Lotus */
3187        0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3188        0x0930,0x094D,0x200D,
3189        0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3190        0x0915,0x0921,0x002B,0x095F,
3191        /* tamil range */
3192        0x0B86, 0xB87, 0xB88,
3193        /* telugu range */
3194        0x0C05, 0x0C02, 0x0C03,0x0c31,
3195        /* kannada range */
3196        0x0C85, 0xC82, 0x0C83,
3197        /* test Abbr sign and Anudatta */
3198        0x0970, 0x952,
3199       /* 0x0958,
3200        0x0959,
3201        0x095A,
3202        0x095B,
3203        0x095C,
3204        0x095D,
3205        0x095E,
3206        0x095F,*/
3207        0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3208        0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3209        0x090C ,
3210        0x0962,
3211        0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3212        0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3213        0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3214        0x093D /* Avagraha  0xEA, 0xE9*/,
3215        0x0958,
3216        0x0959,
3217        0x095A,
3218        0x095B,
3219        0x095C,
3220        0x095D,
3221        0x095E,
3222        0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3223      };
3224    static const unsigned char byteArr[]={
3225
3226        0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3227        0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3228        0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3229        0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3230        0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3231        0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3232        0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3233        0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3234        0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3235        0xf8,0xf9,0xfa,
3236        /* test soft halant */
3237        0xb3, 0xE8, 0xE9,
3238        /* test explicit halant */
3239        0xb3, 0xE8, 0xE8,
3240        /* test double danda */
3241        0xea, 0xea,
3242        /* test ASCII */
3243        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3244        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3245        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3246        /* test ATR code */
3247
3248        /* tests from Lotus */
3249        0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3250        0xEF,0x42,0xCF,0xE8,0xD9,
3251        0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3252        0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3253        /* tamil range */
3254        0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3255        /* telugu range */
3256        0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3257        /* kannada range */
3258        0xEF, 0x48,0xa4, 0xa2, 0xa3,
3259        /* anudatta and abbreviation sign */
3260        0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3261
3262
3263        0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3264
3265        0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3266
3267        0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3268
3269        0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3270
3271        0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3272
3273        0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3274
3275        0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3276
3277        0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3278
3279        0xB3, 0xE9, /* Ka + NUKTA */
3280
3281        0xB4, 0xE9, /* Kha + NUKTA */
3282
3283        0xB5, 0xE9, /* Ga + NUKTA */
3284
3285        0xBA, 0xE9,
3286
3287        0xBF, 0xE9,
3288
3289        0xC0, 0xE9,
3290
3291        0xC9, 0xE9,
3292        /* INV halant RA    */
3293        0xD9, 0xE8, 0xCF,
3294        0x00, 0x00A0,
3295        /* just consume unhandled codepoints */
3296        0xEF, 0x30,
3297
3298    };
3299    testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3300    TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3301
3302}
3303
3304static void
3305TestISO_2022_JP() {
3306    /* test input */
3307    static const uint16_t in[]={
3308        0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3309        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3310        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3311        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3312        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3313        0x201D, 0x3014, 0x000D, 0x000A,
3314        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3315        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3316        };
3317    const UChar* uSource;
3318    const UChar* uSourceLimit;
3319    const char* cSource;
3320    const char* cSourceLimit;
3321    UChar *uTargetLimit =NULL;
3322    UChar *uTarget;
3323    char *cTarget;
3324    const char *cTargetLimit;
3325    char *cBuf;
3326    UChar *uBuf,*test;
3327    int32_t uBufSize = 120;
3328    UErrorCode errorCode=U_ZERO_ERROR;
3329    UConverter *cnv;
3330    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3331    int32_t* myOff= offsets;
3332    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3333    if(U_FAILURE(errorCode)) {
3334        log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3335        return;
3336    }
3337
3338    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3339    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3340    uSource = (const UChar*)in;
3341    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3342    cTarget = cBuf;
3343    cTargetLimit = cBuf +uBufSize*5;
3344    uTarget = uBuf;
3345    uTargetLimit = uBuf+ uBufSize*5;
3346    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3347    if(U_FAILURE(errorCode)){
3348        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3349        return;
3350    }
3351    cSource = cBuf;
3352    cSourceLimit =cTarget;
3353    test =uBuf;
3354    myOff=offsets;
3355    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3356    if(U_FAILURE(errorCode)){
3357        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3358        return;
3359    }
3360
3361    uSource = (const UChar*)in;
3362    while(uSource<uSourceLimit){
3363        if(*test!=*uSource){
3364
3365            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3366        }
3367        uSource++;
3368        test++;
3369    }
3370
3371    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3372    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3373    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3374    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3375    TestJitterbug930("csISO2022JP");
3376    ucnv_close(cnv);
3377    free(uBuf);
3378    free(cBuf);
3379    free(offsets);
3380}
3381
3382static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3383    const UChar* uSource;
3384    const UChar* uSourceLimit;
3385    const char* cSource;
3386    const char* cSourceLimit;
3387    UChar *uTargetLimit =NULL;
3388    UChar *uTarget;
3389    char *cTarget;
3390    const char *cTargetLimit;
3391    char *cBuf;
3392    UChar *uBuf,*test;
3393    int32_t uBufSize = 120*10;
3394    UErrorCode errorCode=U_ZERO_ERROR;
3395    UConverter *cnv;
3396    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3397    int32_t* myOff= offsets;
3398    cnv=my_ucnv_open(conv, &errorCode);
3399    if(U_FAILURE(errorCode)) {
3400        log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3401        return;
3402    }
3403
3404    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3405    cBuf =(char*)malloc(uBufSize * sizeof(char));
3406    uSource = (const UChar*)in;
3407    uSourceLimit=uSource+len;
3408    cTarget = cBuf;
3409    cTargetLimit = cBuf +uBufSize;
3410    uTarget = uBuf;
3411    uTargetLimit = uBuf+ uBufSize;
3412    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3413    if(U_FAILURE(errorCode)){
3414        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3415        return;
3416    }
3417    /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3418    cSource = cBuf;
3419    cSourceLimit =cTarget;
3420    test =uBuf;
3421    myOff=offsets;
3422    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3423    if(U_FAILURE(errorCode)){
3424        log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3425        return;
3426    }
3427
3428    uSource = (const UChar*)in;
3429    while(uSource<uSourceLimit){
3430        if(*test!=*uSource){
3431            log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3432        }
3433        uSource++;
3434        test++;
3435    }
3436    TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3437    TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3438    TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3439    if(byteArr && byteArrLen!=0){
3440        TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3441        TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3442        {
3443            cSource = byteArr;
3444            cSourceLimit = cSource+byteArrLen;
3445            test=uBuf;
3446            myOff = offsets;
3447            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3448            if(U_FAILURE(errorCode)){
3449                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3450                return;
3451            }
3452
3453            uSource = (const UChar*)in;
3454            while(uSource<uSourceLimit){
3455                if(*test!=*uSource){
3456                    log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3457                }
3458                uSource++;
3459                test++;
3460            }
3461        }
3462    }
3463
3464    ucnv_close(cnv);
3465    free(uBuf);
3466    free(cBuf);
3467    free(offsets);
3468}
3469static UChar U_CALLCONV
3470_charAt(int32_t offset, void *context) {
3471    return ((char*)context)[offset];
3472}
3473
3474static int32_t
3475unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3476    int32_t srcIndex=0;
3477    int32_t dstIndex=0;
3478    if(U_FAILURE(*status)){
3479        return 0;
3480    }
3481    if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3482        *status = U_ILLEGAL_ARGUMENT_ERROR;
3483        return 0;
3484    }
3485    if(srcLen==-1){
3486        srcLen = (int32_t)uprv_strlen(src);
3487    }
3488
3489    for (; srcIndex<srcLen; ) {
3490        UChar32 c = src[srcIndex++];
3491        if (c == 0x005C /*'\\'*/) {
3492            c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3493            if (c == (UChar32)0xFFFFFFFF) {
3494                *status=U_INVALID_CHAR_FOUND; /* return empty string */
3495                break; /* invalid escape sequence */
3496            }
3497        }
3498        if(dstIndex < dstLen){
3499            if(c>0xFFFF){
3500               dst[dstIndex++] = U16_LEAD(c);
3501               if(dstIndex<dstLen){
3502                    dst[dstIndex]=U16_TRAIL(c);
3503               }else{
3504                   *status=U_BUFFER_OVERFLOW_ERROR;
3505               }
3506            }else{
3507                dst[dstIndex]=(UChar)c;
3508            }
3509
3510        }else{
3511            *status = U_BUFFER_OVERFLOW_ERROR;
3512        }
3513        dstIndex++; /* for preflighting */
3514    }
3515    return dstIndex;
3516}
3517
3518static void
3519TestFullRoundtrip(const char* cp){
3520    UChar usource[10] ={0};
3521    UChar nsrc[10] = {0};
3522    uint32_t i=1;
3523    int len=0, ulen;
3524    nsrc[0]=0x0061;
3525    /* Test codepoint 0 */
3526    TestConv(usource,1,cp,"",NULL,0);
3527    TestConv(usource,2,cp,"",NULL,0);
3528    nsrc[2]=0x5555;
3529    TestConv(nsrc,3,cp,"",NULL,0);
3530
3531    for(;i<=0x10FFFF;i++){
3532        if(i==0xD800){
3533            i=0xDFFF;
3534            continue;
3535        }
3536        if(i<=0xFFFF){
3537            usource[0] =(UChar) i;
3538            len=1;
3539        }else{
3540            usource[0]=U16_LEAD(i);
3541            usource[1]=U16_TRAIL(i);
3542            len=2;
3543        }
3544        ulen=len;
3545        if(i==0x80) {
3546            usource[2]=0;
3547        }
3548        /* Test only single code points */
3549        TestConv(usource,ulen,cp,"",NULL,0);
3550        /* Test codepoint repeated twice */
3551        usource[ulen]=usource[0];
3552        usource[ulen+1]=usource[1];
3553        ulen+=len;
3554        TestConv(usource,ulen,cp,"",NULL,0);
3555        /* Test codepoint repeated 3 times */
3556        usource[ulen]=usource[0];
3557        usource[ulen+1]=usource[1];
3558        ulen+=len;
3559        TestConv(usource,ulen,cp,"",NULL,0);
3560        /* Test codepoint in between 2 codepoints */
3561        nsrc[1]=usource[0];
3562        nsrc[2]=usource[1];
3563        nsrc[len+1]=0x5555;
3564        TestConv(nsrc,len+2,cp,"",NULL,0);
3565        uprv_memset(usource,0,sizeof(UChar)*10);
3566    }
3567}
3568
3569static void
3570TestRoundTrippingAllUTF(void){
3571    if(!getTestOption(QUICK_OPTION)){
3572        log_verbose("Running exhaustive round trip test for BOCU-1\n");
3573        TestFullRoundtrip("BOCU-1");
3574        log_verbose("Running exhaustive round trip test for SCSU\n");
3575        TestFullRoundtrip("SCSU");
3576        log_verbose("Running exhaustive round trip test for UTF-8\n");
3577        TestFullRoundtrip("UTF-8");
3578        log_verbose("Running exhaustive round trip test for CESU-8\n");
3579        TestFullRoundtrip("CESU-8");
3580        log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3581        TestFullRoundtrip("UTF-16BE");
3582        log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3583        TestFullRoundtrip("UTF-16LE");
3584        log_verbose("Running exhaustive round trip test for UTF-16\n");
3585        TestFullRoundtrip("UTF-16");
3586        log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3587        TestFullRoundtrip("UTF-32BE");
3588        log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3589        TestFullRoundtrip("UTF-32LE");
3590        log_verbose("Running exhaustive round trip test for UTF-32\n");
3591        TestFullRoundtrip("UTF-32");
3592        log_verbose("Running exhaustive round trip test for UTF-7\n");
3593        TestFullRoundtrip("UTF-7");
3594        log_verbose("Running exhaustive round trip test for UTF-7\n");
3595        TestFullRoundtrip("UTF-7,version=1");
3596        log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3597        TestFullRoundtrip("IMAP-mailbox-name");
3598        /*
3599         *
3600         * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3601         * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3602         * The old mappings remain as fallbacks.
3603         * This test may be reintroduced at a later time.
3604         *
3605         * 110118 - mow
3606         */
3607         /*
3608         log_verbose("Running exhaustive round trip test for GB18030\n");
3609         TestFullRoundtrip("GB18030");
3610         */
3611    }
3612}
3613
3614static void
3615TestSCSU() {
3616
3617    static const uint16_t germanUTF16[]={
3618        0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3619    };
3620
3621    static const uint8_t germanSCSU[]={
3622        0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3623    };
3624
3625    static const uint16_t russianUTF16[]={
3626        0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3627    };
3628
3629    static const uint8_t russianSCSU[]={
3630        0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3631    };
3632
3633    static const uint16_t japaneseUTF16[]={
3634        0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3635        0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3636        0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3637        0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3638        0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3639        0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3640        0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3641        0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3642        0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3643        0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3644        0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3645        0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3646        0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3647        0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3648        0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3649    };
3650
3651    /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3652     it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3653    static const uint8_t japaneseSCSU[]={
3654        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3655        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3656        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3657        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3658        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3659        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3660        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3661        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3662        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3663        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3664        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3665        0xcb, 0x82
3666    };
3667
3668    static const uint16_t allFeaturesUTF16[]={
3669        0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3670        0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3671        0x01df, 0xf000, 0xdbff, 0xdfff
3672    };
3673
3674    /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3675     * result here (34B vs. 35B)
3676     */
3677    static const uint8_t allFeaturesSCSU[]={
3678        0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3679        0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3680        0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3681        0xdf, 0x14, 0x80, 0x15, 0xff
3682    };
3683    static const uint16_t monkeyIn[]={
3684        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3685        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3686        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3687        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3688        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3689        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3690        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3691        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3692        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3693        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3694        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3695        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3696        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3697        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3698        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3699        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3700        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3701        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3702        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3703        /* test non-BMP code points */
3704        0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3705        0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3706        0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3707        0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3708        0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3709        0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3710        0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3711        0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3712        0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3713        0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3714        0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3715
3716
3717        0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3718        0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3719        0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3720        0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3721        0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3722    };
3723    static const char *fTestCases [] = {
3724          "\\ud800\\udc00", /* smallest surrogate*/
3725          "\\ud8ff\\udcff",
3726          "\\udBff\\udFff", /* largest surrogate pair*/
3727          "\\ud834\\udc00",
3728          "\\U0010FFFF",
3729          "Hello \\u9292 \\u9192 World!",
3730          "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3731          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3732
3733          "\\u0648\\u06c8", /* catch missing reset*/
3734          "\\u0648\\u06c8",
3735
3736          "\\u4444\\uE001", /* lowest quotable*/
3737          "\\u4444\\uf2FF", /* highest quotable*/
3738          "\\u4444\\uf188\\u4444",
3739          "\\u4444\\uf188\\uf288",
3740          "\\u4444\\uf188abc\\u0429\\uf288",
3741          "\\u9292\\u2222",
3742          "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3743          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3744          "Hello World!123456",
3745          "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3746
3747          "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3748          "abc\\u4411d",      /* uses SQU*/
3749          "abc\\u4411\\u4412d",/* uses SCU*/
3750          "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3751          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3752          "\\u9292\\u2222",
3753          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3754          "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3755          "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3756
3757          "", /* empty input*/
3758          "\\u0000", /* smallest BMP character*/
3759          "\\uFFFF", /* largest BMP character*/
3760
3761          /* regression tests*/
3762          "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3763          "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3764          "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3765          "\\u0041\\u00df\\u0401\\u015f",
3766          "\\u9066\\u2123abc",
3767          "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3768          "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3769    };
3770    int i=0;
3771    for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3772        const char* cSrc = fTestCases[i];
3773        UErrorCode status = U_ZERO_ERROR;
3774        int32_t cSrcLen,srcLen;
3775        UChar* src;
3776        /* UConverter* cnv = ucnv_open("SCSU",&status); */
3777        cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3778        src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3779        srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3780        log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3781        TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3782        free(src);
3783    }
3784    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3785    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3786    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3787    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3788    TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3789    TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3790    TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3791}
3792
3793#if !UCONFIG_NO_LEGACY_CONVERSION
3794static void TestJitterbug2346(){
3795    char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3796                      0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3797    uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3798
3799    UChar uTarget[500]={'\0'};
3800    UChar* utarget=uTarget;
3801    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3802
3803    char cTarget[500]={'\0'};
3804    char* ctarget=cTarget;
3805    char* ctargetLimit=cTarget+sizeof(cTarget);
3806    const char* csource=source;
3807    UChar* temp = expected;
3808    UErrorCode err=U_ZERO_ERROR;
3809
3810    UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3811    if(U_FAILURE(err)) {
3812        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3813        return;
3814    }
3815    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3816    if(U_FAILURE(err)) {
3817        log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3818        return;
3819    }
3820    utargetLimit=utarget;
3821    utarget = uTarget;
3822    while(utarget<utargetLimit){
3823        if(*temp!=*utarget){
3824
3825            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3826        }
3827        utarget++;
3828        temp++;
3829    }
3830    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3831    if(U_FAILURE(err)) {
3832        log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3833        return;
3834    }
3835    ctargetLimit=ctarget;
3836    ctarget =cTarget;
3837    ucnv_close(conv);
3838
3839
3840}
3841
3842static void
3843TestISO_2022_JP_1() {
3844    /* test input */
3845    static const uint16_t in[]={
3846        0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3847        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3848        0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3849        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3850        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3851        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3852        0x201D, 0x000D, 0x000A,
3853        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3854        0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3855        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3856        0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3857        0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3858        0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3859      };
3860    const UChar* uSource;
3861    const UChar* uSourceLimit;
3862    const char* cSource;
3863    const char* cSourceLimit;
3864    UChar *uTargetLimit =NULL;
3865    UChar *uTarget;
3866    char *cTarget;
3867    const char *cTargetLimit;
3868    char *cBuf;
3869    UChar *uBuf,*test;
3870    int32_t uBufSize = 120;
3871    UErrorCode errorCode=U_ZERO_ERROR;
3872    UConverter *cnv;
3873
3874    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3875    if(U_FAILURE(errorCode)) {
3876        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3877        return;
3878    }
3879
3880    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3881    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3882    uSource = (const UChar*)in;
3883    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3884    cTarget = cBuf;
3885    cTargetLimit = cBuf +uBufSize*5;
3886    uTarget = uBuf;
3887    uTargetLimit = uBuf+ uBufSize*5;
3888    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3889    if(U_FAILURE(errorCode)){
3890        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3891        return;
3892    }
3893    cSource = cBuf;
3894    cSourceLimit =cTarget;
3895    test =uBuf;
3896    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3897    if(U_FAILURE(errorCode)){
3898        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3899        return;
3900    }
3901    uSource = (const UChar*)in;
3902    while(uSource<uSourceLimit){
3903        if(*test!=*uSource){
3904
3905            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3906        }
3907        uSource++;
3908        test++;
3909    }
3910    /*ucnv_close(cnv);
3911    cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3912    /*Test for the condition where there is an invalid character*/
3913    ucnv_reset(cnv);
3914    {
3915        static const uint8_t source2[]={0x0e,0x24,0x053};
3916        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3917    }
3918    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3919    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3920    ucnv_close(cnv);
3921    free(uBuf);
3922    free(cBuf);
3923}
3924
3925static void
3926TestISO_2022_JP_2() {
3927    /* test input */
3928    static const uint16_t in[]={
3929        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3930        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3931        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3932        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3933        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3934        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3935        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3936        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3937        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3938        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3939        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3940        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3941        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3942        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3943        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3944        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3945        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3946        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3947        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3948      };
3949    const UChar* uSource;
3950    const UChar* uSourceLimit;
3951    const char* cSource;
3952    const char* cSourceLimit;
3953    UChar *uTargetLimit =NULL;
3954    UChar *uTarget;
3955    char *cTarget;
3956    const char *cTargetLimit;
3957    char *cBuf;
3958    UChar *uBuf,*test;
3959    int32_t uBufSize = 120;
3960    UErrorCode errorCode=U_ZERO_ERROR;
3961    UConverter *cnv;
3962    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3963    int32_t* myOff= offsets;
3964    cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3965    if(U_FAILURE(errorCode)) {
3966        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3967        return;
3968    }
3969
3970    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3971    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3972    uSource = (const UChar*)in;
3973    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3974    cTarget = cBuf;
3975    cTargetLimit = cBuf +uBufSize*5;
3976    uTarget = uBuf;
3977    uTargetLimit = uBuf+ uBufSize*5;
3978    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3979    if(U_FAILURE(errorCode)){
3980        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3981        return;
3982    }
3983    cSource = cBuf;
3984    cSourceLimit =cTarget;
3985    test =uBuf;
3986    myOff=offsets;
3987    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3988    if(U_FAILURE(errorCode)){
3989        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3990        return;
3991    }
3992    uSource = (const UChar*)in;
3993    while(uSource<uSourceLimit){
3994        if(*test!=*uSource){
3995
3996            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3997        }
3998        uSource++;
3999        test++;
4000    }
4001    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4002    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4003    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4004    /*Test for the condition where there is an invalid character*/
4005    ucnv_reset(cnv);
4006    {
4007        static const uint8_t source2[]={0x0e,0x24,0x053};
4008        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4009    }
4010    ucnv_close(cnv);
4011    free(uBuf);
4012    free(cBuf);
4013    free(offsets);
4014}
4015
4016static void
4017TestISO_2022_KR() {
4018    /* test input */
4019    static const uint16_t in[]={
4020                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4021                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4022                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4023                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4024                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4025                   ,0x53E3,0x53E4,0x000A,0x000D};
4026    const UChar* uSource;
4027    const UChar* uSourceLimit;
4028    const char* cSource;
4029    const char* cSourceLimit;
4030    UChar *uTargetLimit =NULL;
4031    UChar *uTarget;
4032    char *cTarget;
4033    const char *cTargetLimit;
4034    char *cBuf;
4035    UChar *uBuf,*test;
4036    int32_t uBufSize = 120;
4037    UErrorCode errorCode=U_ZERO_ERROR;
4038    UConverter *cnv;
4039    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4040    int32_t* myOff= offsets;
4041    cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4042    if(U_FAILURE(errorCode)) {
4043        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4044        return;
4045    }
4046
4047    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4048    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4049    uSource = (const UChar*)in;
4050    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4051    cTarget = cBuf;
4052    cTargetLimit = cBuf +uBufSize*5;
4053    uTarget = uBuf;
4054    uTargetLimit = uBuf+ uBufSize*5;
4055    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4056    if(U_FAILURE(errorCode)){
4057        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4058        return;
4059    }
4060    cSource = cBuf;
4061    cSourceLimit =cTarget;
4062    test =uBuf;
4063    myOff=offsets;
4064    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4065    if(U_FAILURE(errorCode)){
4066        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4067        return;
4068    }
4069    uSource = (const UChar*)in;
4070    while(uSource<uSourceLimit){
4071        if(*test!=*uSource){
4072            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4073        }
4074        uSource++;
4075        test++;
4076    }
4077    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4078    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4079    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4080    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4081    TestJitterbug930("csISO2022KR");
4082    /*Test for the condition where there is an invalid character*/
4083    ucnv_reset(cnv);
4084    {
4085        static const uint8_t source2[]={0x1b,0x24,0x053};
4086        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4087        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4088    }
4089    ucnv_close(cnv);
4090    free(uBuf);
4091    free(cBuf);
4092    free(offsets);
4093}
4094
4095static void
4096TestISO_2022_KR_1() {
4097    /* test input */
4098    static const uint16_t in[]={
4099                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4100                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4101                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4102                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4103                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4104                   ,0x53E3,0x53E4,0x000A,0x000D};
4105    const UChar* uSource;
4106    const UChar* uSourceLimit;
4107    const char* cSource;
4108    const char* cSourceLimit;
4109    UChar *uTargetLimit =NULL;
4110    UChar *uTarget;
4111    char *cTarget;
4112    const char *cTargetLimit;
4113    char *cBuf;
4114    UChar *uBuf,*test;
4115    int32_t uBufSize = 120;
4116    UErrorCode errorCode=U_ZERO_ERROR;
4117    UConverter *cnv;
4118    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4119    int32_t* myOff= offsets;
4120    cnv=ucnv_open("ibm-25546", &errorCode);
4121    if(U_FAILURE(errorCode)) {
4122        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4123        return;
4124    }
4125
4126    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4127    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4128    uSource = (const UChar*)in;
4129    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4130    cTarget = cBuf;
4131    cTargetLimit = cBuf +uBufSize*5;
4132    uTarget = uBuf;
4133    uTargetLimit = uBuf+ uBufSize*5;
4134    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4135    if(U_FAILURE(errorCode)){
4136        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4137        return;
4138    }
4139    cSource = cBuf;
4140    cSourceLimit =cTarget;
4141    test =uBuf;
4142    myOff=offsets;
4143    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4144    if(U_FAILURE(errorCode)){
4145        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4146        return;
4147    }
4148    uSource = (const UChar*)in;
4149    while(uSource<uSourceLimit){
4150        if(*test!=*uSource){
4151            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4152        }
4153        uSource++;
4154        test++;
4155    }
4156    ucnv_reset(cnv);
4157    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4158    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4159    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4160    ucnv_reset(cnv);
4161    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4162        /*Test for the condition where there is an invalid character*/
4163    ucnv_reset(cnv);
4164    {
4165        static const uint8_t source2[]={0x1b,0x24,0x053};
4166        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4167        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4168    }
4169    ucnv_close(cnv);
4170    free(uBuf);
4171    free(cBuf);
4172    free(offsets);
4173}
4174
4175static void TestJitterbug2411(){
4176    static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4177                         "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4178    UConverter* kr=NULL, *kr1=NULL;
4179    UErrorCode errorCode = U_ZERO_ERROR;
4180    UChar tgt[100]={'\0'};
4181    UChar* target = tgt;
4182    UChar* targetLimit = target+100;
4183    kr=ucnv_open("iso-2022-kr", &errorCode);
4184    if(U_FAILURE(errorCode)) {
4185        log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4186        return;
4187    }
4188    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4189    if(U_FAILURE(errorCode)) {
4190        log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4191        return;
4192    }
4193    kr1 = ucnv_open("ibm-25546", &errorCode);
4194    if(U_FAILURE(errorCode)) {
4195        log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4196        return;
4197    }
4198    target = tgt;
4199    targetLimit = target+100;
4200    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4201
4202    if(U_FAILURE(errorCode)) {
4203        log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4204        return;
4205    }
4206
4207    ucnv_close(kr);
4208    ucnv_close(kr1);
4209
4210}
4211
4212static void
4213TestJIS(){
4214    /* From Unicode moved to testdata/conversion.txt */
4215    /*To Unicode*/
4216    {
4217        static const uint8_t sampleTextJIS[] = {
4218            0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4219            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4220            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4221        };
4222        static const uint16_t expectedISO2022JIS[] = {
4223            0x0041, 0x0042,
4224            0xFF81, 0xFF82,
4225            0x3000
4226        };
4227        static const int32_t  toISO2022JISOffs[]={
4228            3,4,
4229            8,9,
4230            16
4231        };
4232
4233        static const uint8_t sampleTextJIS7[] = {
4234            0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4235            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4236            0x1b,0x24,0x42,0x21,0x21,
4237            0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4238            0x21,0x22,
4239            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4240        };
4241        static const uint16_t expectedISO2022JIS7[] = {
4242            0x0041, 0x0042,
4243            0xFF81, 0xFF82,
4244            0x3000,
4245            0xFF81, 0xFF82,
4246            0x3001,
4247            0x3000
4248        };
4249        static const int32_t  toISO2022JIS7Offs[]={
4250            3,4,
4251            8,9,
4252            13,16,
4253            17,
4254            19,27
4255        };
4256        static const uint8_t sampleTextJIS8[] = {
4257            0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4258            0xa1,0xc8,0xd9,/*Katakana Set*/
4259            0x1b,0x28,0x42,
4260            0x41,0x42,
4261            0xb1,0xc3, /*Katakana Set*/
4262            0x1b,0x24,0x42,0x21,0x21
4263        };
4264        static const uint16_t expectedISO2022JIS8[] = {
4265            0x0041, 0x0042,
4266            0xff61, 0xff88, 0xff99,
4267            0x0041, 0x0042,
4268            0xff71, 0xff83,
4269            0x3000
4270        };
4271        static const int32_t  toISO2022JIS8Offs[]={
4272            3, 4,  5,  6,
4273            7, 11, 12, 13,
4274            14, 18,
4275        };
4276
4277        testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4278            sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4279        testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4280            sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4281        testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4282            sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4283    }
4284
4285}
4286
4287
4288#if 0
4289 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4290
4291static void TestJitterbug915(){
4292/* tests for roundtripping of the below sequence
4293\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4294\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4295\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4296\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4297\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4298\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4299\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4300*/
4301    static const char cSource[]={
4302        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4303        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4304        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4305        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4306        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4307        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4308        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4309        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4310        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4311        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4312        0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4313        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4314        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4315        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4316        0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4317        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4318        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4319        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4320        0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4321        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4322        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4323        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4324        0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4325        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4326        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4327        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4328        0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4329        0x37, 0x20, 0x2A, 0x2F
4330    };
4331    UChar uTarget[500]={'\0'};
4332    UChar* utarget=uTarget;
4333    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4334
4335    char cTarget[500]={'\0'};
4336    char* ctarget=cTarget;
4337    char* ctargetLimit=cTarget+sizeof(cTarget);
4338    const char* csource=cSource;
4339    const char* tempSrc = cSource;
4340    UErrorCode err=U_ZERO_ERROR;
4341
4342    UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4343    if(U_FAILURE(err)) {
4344        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4345        return;
4346    }
4347    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4348    if(U_FAILURE(err)) {
4349        log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4350        return;
4351    }
4352    utargetLimit=utarget;
4353    utarget = uTarget;
4354    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4355    if(U_FAILURE(err)) {
4356        log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4357        return;
4358    }
4359    ctargetLimit=ctarget;
4360    ctarget =cTarget;
4361    while(ctarget<ctargetLimit){
4362        if(*ctarget != *tempSrc){
4363            log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4364        }
4365        ++ctarget;
4366        ++tempSrc;
4367    }
4368
4369    ucnv_close(conv);
4370}
4371
4372static void
4373TestISO_2022_CN_EXT() {
4374    /* test input */
4375    static const uint16_t in[]={
4376                /* test Non-BMP code points */
4377         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4378         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4379         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4380         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4381         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4382         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4383         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4384         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4385         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4386         0xD869, 0xDED5,
4387
4388         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4389         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4390         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4391         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4392         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4393         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4394         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4395         0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4396         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4397         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4398         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4399         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4400         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4401         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4402         0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4403         0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4404         0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4405         0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4406
4407         0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4408
4409      };
4410
4411    const UChar* uSource;
4412    const UChar* uSourceLimit;
4413    const char* cSource;
4414    const char* cSourceLimit;
4415    UChar *uTargetLimit =NULL;
4416    UChar *uTarget;
4417    char *cTarget;
4418    const char *cTargetLimit;
4419    char *cBuf;
4420    UChar *uBuf,*test;
4421    int32_t uBufSize = 180;
4422    UErrorCode errorCode=U_ZERO_ERROR;
4423    UConverter *cnv;
4424    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4425    int32_t* myOff= offsets;
4426    cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4427    if(U_FAILURE(errorCode)) {
4428        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4429        return;
4430    }
4431
4432    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4433    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4434    uSource = (const UChar*)in;
4435    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4436    cTarget = cBuf;
4437    cTargetLimit = cBuf +uBufSize*5;
4438    uTarget = uBuf;
4439    uTargetLimit = uBuf+ uBufSize*5;
4440    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4441    if(U_FAILURE(errorCode)){
4442        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4443        return;
4444    }
4445    cSource = cBuf;
4446    cSourceLimit =cTarget;
4447    test =uBuf;
4448    myOff=offsets;
4449    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4450    if(U_FAILURE(errorCode)){
4451        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4452        return;
4453    }
4454    uSource = (const UChar*)in;
4455    while(uSource<uSourceLimit){
4456        if(*test!=*uSource){
4457            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4458        }
4459        else{
4460            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4461        }
4462        uSource++;
4463        test++;
4464    }
4465    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4466    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4467    /*Test for the condition where there is an invalid character*/
4468    ucnv_reset(cnv);
4469    {
4470        static const uint8_t source2[]={0x0e,0x24,0x053};
4471        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4472    }
4473    ucnv_close(cnv);
4474    free(uBuf);
4475    free(cBuf);
4476    free(offsets);
4477}
4478#endif
4479
4480static void
4481TestISO_2022_CN() {
4482    /* test input */
4483    static const uint16_t in[]={
4484         /* jitterbug 951 */
4485         0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4486         0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4487         0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4488         0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4489         0x0020, 0x0045, 0x004e, 0x0044,
4490         /**/
4491         0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4492         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4493         0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4494         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4495         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4496         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4497         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4498         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4499         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4500         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4501         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4502         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4503         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4504         0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4505         0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4506         0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4507         0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4508
4509      };
4510    const UChar* uSource;
4511    const UChar* uSourceLimit;
4512    const char* cSource;
4513    const char* cSourceLimit;
4514    UChar *uTargetLimit =NULL;
4515    UChar *uTarget;
4516    char *cTarget;
4517    const char *cTargetLimit;
4518    char *cBuf;
4519    UChar *uBuf,*test;
4520    int32_t uBufSize = 180;
4521    UErrorCode errorCode=U_ZERO_ERROR;
4522    UConverter *cnv;
4523    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4524    int32_t* myOff= offsets;
4525    cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4526    if(U_FAILURE(errorCode)) {
4527        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4528        return;
4529    }
4530
4531    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4532    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4533    uSource = (const UChar*)in;
4534    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4535    cTarget = cBuf;
4536    cTargetLimit = cBuf +uBufSize*5;
4537    uTarget = uBuf;
4538    uTargetLimit = uBuf+ uBufSize*5;
4539    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4540    if(U_FAILURE(errorCode)){
4541        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4542        return;
4543    }
4544    cSource = cBuf;
4545    cSourceLimit =cTarget;
4546    test =uBuf;
4547    myOff=offsets;
4548    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4549    if(U_FAILURE(errorCode)){
4550        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4551        return;
4552    }
4553    uSource = (const UChar*)in;
4554    while(uSource<uSourceLimit){
4555        if(*test!=*uSource){
4556            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4557        }
4558        else{
4559            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4560        }
4561        uSource++;
4562        test++;
4563    }
4564    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4565    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4566    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4567    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4568    TestJitterbug930("csISO2022CN");
4569    /*Test for the condition where there is an invalid character*/
4570    ucnv_reset(cnv);
4571    {
4572        static const uint8_t source2[]={0x0e,0x24,0x053};
4573        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4574    }
4575
4576    ucnv_close(cnv);
4577    free(uBuf);
4578    free(cBuf);
4579    free(offsets);
4580}
4581
4582/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4583typedef struct {
4584    const char *    converterName;
4585    const char *    inputText;
4586    int             inputTextLength;
4587} EmptySegmentTest;
4588
4589/* Callback for TestJitterbug6175, should only get called for empty segment errors */
4590static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4591                                             int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4592    if (reason > UCNV_IRREGULAR) {
4593        return;
4594    }
4595    if (reason != UCNV_IRREGULAR) {
4596        log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4597    }
4598    /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4599    *err = U_ZERO_ERROR;
4600    ucnv_cbToUWriteSub(toArgs,0,err);
4601}
4602
4603enum { kEmptySegmentToUCharsMax = 64 };
4604static void TestJitterbug6175(void) {
4605    static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4606    static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4607    static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4608    static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4609    static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4610    static const EmptySegmentTest emptySegmentTests[] = {
4611        /* converterName inputText    inputTextLength */
4612        { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4613        { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4614        { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4615        { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4616        { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4617        /* terminator: */
4618        { NULL,          NULL,        0,                  }
4619    };
4620    const EmptySegmentTest * testPtr;
4621    for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4622        UErrorCode   err = U_ZERO_ERROR;
4623        UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4624        if (U_FAILURE(err)) {
4625            log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4626            return;
4627        }
4628        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4629        if (U_FAILURE(err)) {
4630            log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4631            ucnv_close(cnv);
4632            return;
4633        }
4634        {
4635            UChar         toUChars[kEmptySegmentToUCharsMax];
4636            UChar *       toUCharsPtr = toUChars;
4637            const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4638            const char *  inCharsPtr = testPtr->inputText;
4639            const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4640            ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4641        }
4642        ucnv_close(cnv);
4643    }
4644}
4645
4646static void
4647TestEBCDIC_STATEFUL() {
4648    /* test input */
4649    static const uint8_t in[]={
4650        0x61,
4651        0x1a,
4652        0x0f, 0x4b,
4653        0x42,
4654        0x40,
4655        0x36,
4656    };
4657
4658    /* expected test results */
4659    static const int32_t results[]={
4660        /* number of bytes read, code point */
4661        1, 0x002f,
4662        1, 0x0092,
4663        2, 0x002e,
4664        1, 0xff62,
4665        1, 0x0020,
4666        1, 0x0096,
4667
4668    };
4669    static const uint8_t in2[]={
4670        0x0f,
4671        0xa1,
4672        0x01
4673    };
4674
4675    /* expected test results */
4676    static const int32_t results2[]={
4677        /* number of bytes read, code point */
4678        2, 0x203E,
4679        1, 0x0001,
4680    };
4681
4682    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4683    UErrorCode errorCode=U_ZERO_ERROR;
4684    UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4685    if(U_FAILURE(errorCode)) {
4686        log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4687        return;
4688    }
4689    TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4690    ucnv_reset(cnv);
4691     /* Test the condition when source >= sourceLimit */
4692    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4693    ucnv_reset(cnv);
4694    /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4695    {
4696        static const uint8_t source1[]={0x0f};
4697        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4698    }
4699    /*Test for the condition where there is an invalid character*/
4700    ucnv_reset(cnv);
4701    {
4702        static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4703        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4704    }
4705    ucnv_reset(cnv);
4706    source=(const char*)in2;
4707    limit=(const char*)in2+sizeof(in2);
4708    TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4709    ucnv_close(cnv);
4710
4711}
4712
4713static void
4714TestGB18030() {
4715    /* test input */
4716    static const uint8_t in[]={
4717        0x24,
4718        0x7f,
4719        0x81, 0x30, 0x81, 0x30,
4720        0xa8, 0xbf,
4721        0xa2, 0xe3,
4722        0xd2, 0xbb,
4723        0x82, 0x35, 0x8f, 0x33,
4724        0x84, 0x31, 0xa4, 0x39,
4725        0x90, 0x30, 0x81, 0x30,
4726        0xe3, 0x32, 0x9a, 0x35
4727#if 0
4728        /*
4729         * Feature removed   markus 2000-oct-26
4730         * Only some codepages must match surrogate pairs into supplementary code points -
4731         * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4732         * GB 18030 provides direct encodings for supplementary code points, therefore
4733         * it must not combine two single-encoded surrogates into one code point.
4734         */
4735        0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4736#endif
4737    };
4738
4739    /* expected test results */
4740    static const int32_t results[]={
4741        /* number of bytes read, code point */
4742        1, 0x24,
4743        1, 0x7f,
4744        4, 0x80,
4745        2, 0x1f9,
4746        2, 0x20ac,
4747        2, 0x4e00,
4748        4, 0x9fa6,
4749        4, 0xffff,
4750        4, 0x10000,
4751        4, 0x10ffff
4752#if 0
4753        /* Feature removed. See comment above. */
4754        8, 0x10000
4755#endif
4756    };
4757
4758/*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4759    UErrorCode errorCode=U_ZERO_ERROR;
4760    UConverter *cnv=ucnv_open("gb18030", &errorCode);
4761    if(U_FAILURE(errorCode)) {
4762        log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4763        return;
4764    }
4765    TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4766    ucnv_close(cnv);
4767}
4768
4769static void
4770TestLMBCS() {
4771    /* LMBCS-1 string */
4772    static const uint8_t pszLMBCS[]={
4773        0x61,
4774        0x01, 0x29,
4775        0x81,
4776        0xA0,
4777        0x0F, 0x27,
4778        0x0F, 0x91,
4779        0x14, 0x0a, 0x74,
4780        0x14, 0xF6, 0x02,
4781        0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4782        0x10, 0x88, 0xA0,
4783    };
4784
4785    /* Unicode UChar32 equivalents */
4786    static const UChar32 pszUnicode32[]={
4787        /* code point */
4788        0x00000061,
4789        0x00002013,
4790        0x000000FC,
4791        0x000000E1,
4792        0x00000007,
4793        0x00000091,
4794        0x00000a74,
4795        0x00000200,
4796        0x00023456, /* code point for surrogate pair */
4797        0x00005516
4798    };
4799
4800/* Unicode UChar equivalents */
4801    static const UChar pszUnicode[]={
4802        /* code point */
4803        0x0061,
4804        0x2013,
4805        0x00FC,
4806        0x00E1,
4807        0x0007,
4808        0x0091,
4809        0x0a74,
4810        0x0200,
4811        0xD84D, /* low surrogate */
4812        0xDC56, /* high surrogate */
4813        0x5516
4814    };
4815
4816/* expected test results */
4817    static const int offsets32[]={
4818        /* number of bytes read, code point */
4819        0,
4820        1,
4821        3,
4822        4,
4823        5,
4824        7,
4825        9,
4826        12,
4827        15,
4828        21,
4829        24
4830    };
4831
4832/* expected test results */
4833    static const int offsets[]={
4834        /* number of bytes read, code point */
4835        0,
4836        1,
4837        3,
4838        4,
4839        5,
4840        7,
4841        9,
4842        12,
4843        15,
4844        18,
4845        21,
4846        24
4847    };
4848
4849
4850    UConverter *cnv;
4851
4852#define NAME_LMBCS_1 "LMBCS-1"
4853#define NAME_LMBCS_2 "LMBCS-2"
4854
4855
4856   /* Some basic open/close/property tests on some LMBCS converters */
4857    {
4858
4859      char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4860      char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4861      char get_subchars [1];
4862      const char * get_name;
4863      UConverter *cnv1;
4864      UConverter *cnv2;
4865
4866      int8_t len = sizeof(get_subchars);
4867
4868      UErrorCode errorCode=U_ZERO_ERROR;
4869
4870      /* Open */
4871      cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4872      if(U_FAILURE(errorCode)) {
4873         log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4874         return;
4875      }
4876      cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4877      if(U_FAILURE(errorCode)) {
4878         log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4879         return;
4880      }
4881
4882      /* Name */
4883      get_name = ucnv_getName (cnv1, &errorCode);
4884      if (strcmp(NAME_LMBCS_1,get_name)){
4885         log_err("Unexpected converter name: %s\n", get_name);
4886      }
4887      get_name = ucnv_getName (cnv2, &errorCode);
4888      if (strcmp(NAME_LMBCS_2,get_name)){
4889         log_err("Unexpected converter name: %s\n", get_name);
4890      }
4891
4892      /* substitution chars */
4893      ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4894      if(U_FAILURE(errorCode)) {
4895         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4896      }
4897      if (len!=1){
4898         log_err("Unexpected length of sub chars\n");
4899      }
4900      if (get_subchars[0] != expected_subchars[0]){
4901           log_err("Unexpected value of sub chars\n");
4902      }
4903      ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4904      if(U_FAILURE(errorCode)) {
4905         log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4906      }
4907      ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4908      if(U_FAILURE(errorCode)) {
4909         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4910      }
4911      if (len!=1){
4912         log_err("Unexpected length of sub chars\n");
4913      }
4914      if (get_subchars[0] != new_subchars[0]){
4915           log_err("Unexpected value of sub chars\n");
4916      }
4917      ucnv_close(cnv1);
4918      ucnv_close(cnv2);
4919
4920    }
4921
4922    /* LMBCS to Unicode - offsets */
4923    {
4924       UErrorCode errorCode=U_ZERO_ERROR;
4925
4926       const char * pSource = (const char *)pszLMBCS;
4927       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4928
4929       UChar Out [sizeof(pszUnicode) + 1];
4930       UChar * pOut = Out;
4931       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4932
4933       int32_t off [sizeof(offsets)];
4934
4935      /* last 'offset' in expected results is just the final size.
4936         (Makes other tests easier). Compensate here: */
4937
4938       off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4939
4940
4941
4942      cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4943      if(U_FAILURE(errorCode)) {
4944           log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4945           return;
4946      }
4947
4948
4949
4950      ucnv_toUnicode (cnv,
4951                      &pOut,
4952                      OutLimit,
4953                      &pSource,
4954                      sourceLimit,
4955                      off,
4956                      TRUE,
4957                      &errorCode);
4958
4959
4960       if (memcmp(off,offsets,sizeof(offsets)))
4961       {
4962         log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4963       }
4964       if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4965       {
4966         log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4967       }
4968       ucnv_close(cnv);
4969    }
4970    {
4971   /* LMBCS to Unicode - getNextUChar */
4972      const char * sourceStart;
4973      const char *source=(const char *)pszLMBCS;
4974      const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4975      const UChar32 *results= pszUnicode32;
4976      const int *off = offsets32;
4977
4978      UErrorCode errorCode=U_ZERO_ERROR;
4979      UChar32 uniChar;
4980
4981      cnv=ucnv_open("LMBCS-1", &errorCode);
4982      if(U_FAILURE(errorCode)) {
4983           log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4984           return;
4985      }
4986      else
4987      {
4988
4989         while(source<limit) {
4990            sourceStart=source;
4991            uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4992            if(U_FAILURE(errorCode)) {
4993                  log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4994                  break;
4995            } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4996               log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4997                   uniChar, (source-sourceStart), *results, *off);
4998               break;
4999            }
5000            results++;
5001            off++;
5002         }
5003       }
5004       ucnv_close(cnv);
5005    }
5006    { /* test locale & optimization group operations: Unicode to LMBCS */
5007
5008      UErrorCode errorCode=U_ZERO_ERROR;
5009      UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5010      UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5011      UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5012      UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5013      const UChar * pUniOut = uniString;
5014      UChar * pUniIn = uniString;
5015      uint8_t lmbcsString [4];
5016      const char * pLMBCSOut = (const char *)lmbcsString;
5017      char * pLMBCSIn = (char *)lmbcsString;
5018
5019      /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5020      ucnv_fromUnicode (cnv16he,
5021                        &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
5022                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
5023                        NULL, 1, &errorCode);
5024
5025      if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5026      {
5027         log_err("LMBCS-16,locale=he gives unexpected translation\n");
5028      }
5029
5030      pLMBCSIn= (char *)lmbcsString;
5031      pUniOut = uniString;
5032      ucnv_fromUnicode (cnv01us,
5033                        &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
5034                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
5035                        NULL, 1, &errorCode);
5036
5037      if (lmbcsString[0] != 0x9F)
5038      {
5039         log_err("LMBCS-1,locale=US gives unexpected translation\n");
5040      }
5041
5042      /* single byte char from mbcs char set */
5043      lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5044      pLMBCSOut = (const char *)lmbcsString;
5045      pUniIn = uniString;
5046      ucnv_toUnicode (cnv16jp,
5047                        &pUniIn, pUniIn + 1,
5048                        &pLMBCSOut, (pLMBCSOut + 1),
5049                        NULL, 1, &errorCode);
5050      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5051      {
5052           log_err("Unexpected results from LMBCS-16 single byte char\n");
5053      }
5054      /* convert to group 1: should be 3 bytes */
5055      pLMBCSIn = (char *)lmbcsString;
5056      pUniOut = uniString;
5057      ucnv_fromUnicode (cnv01us,
5058                        &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5059                        &pUniOut, pUniOut + 1,
5060                        NULL, 1, &errorCode);
5061      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5062         || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5063      {
5064           log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5065      }
5066      pLMBCSOut = (const char *)lmbcsString;
5067      pUniIn = uniString;
5068      ucnv_toUnicode (cnv01us,
5069                        &pUniIn, pUniIn + 1,
5070                        &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5071                        NULL, 1, &errorCode);
5072      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5073      {
5074           log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5075      }
5076      pLMBCSIn = (char *)lmbcsString;
5077      pUniOut = uniString;
5078      ucnv_fromUnicode (cnv16jp,
5079                        &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5080                        &pUniOut, pUniOut + 1,
5081                        NULL, 1, &errorCode);
5082      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5083      {
5084           log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5085      }
5086      ucnv_close(cnv16he);
5087      ucnv_close(cnv16jp);
5088      ucnv_close(cnv01us);
5089    }
5090    {
5091       /* Small source buffer testing, LMBCS -> Unicode */
5092
5093       UErrorCode errorCode=U_ZERO_ERROR;
5094
5095       const char * pSource = (const char *)pszLMBCS;
5096       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5097       int codepointCount = 0;
5098
5099       UChar Out [sizeof(pszUnicode) + 1];
5100       UChar * pOut = Out;
5101       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
5102
5103
5104       cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5105       if(U_FAILURE(errorCode)) {
5106           log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5107           return;
5108       }
5109
5110
5111       while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5112       {
5113           ucnv_toUnicode (cnv,
5114               &pOut,
5115               OutLimit,
5116               &pSource,
5117               (pSource+1), /* claim that this is a 1- byte buffer */
5118               NULL,
5119               FALSE,    /* FALSE means there might be more chars in the next buffer */
5120               &errorCode);
5121
5122           if (U_SUCCESS (errorCode))
5123           {
5124               if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5125               {
5126                   /* we are on to the next code point: check value */
5127
5128                   if (Out[0] != pszUnicode[codepointCount]){
5129                       log_err("LMBCS->Uni result %lx should have been %lx \n",
5130                           Out[0], pszUnicode[codepointCount]);
5131                   }
5132
5133                   pOut = Out; /* reset for accumulating next code point */
5134                   codepointCount++;
5135               }
5136           }
5137           else
5138           {
5139               log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5140           }
5141       }
5142       {
5143         /* limits & surrogate error testing */
5144         char LIn [sizeof(pszLMBCS)];
5145         const char * pLIn = LIn;
5146
5147         char LOut [sizeof(pszLMBCS)];
5148         char * pLOut = LOut;
5149
5150         UChar UOut [sizeof(pszUnicode)];
5151         UChar * pUOut = UOut;
5152
5153         UChar UIn [sizeof(pszUnicode)];
5154         const UChar * pUIn = UIn;
5155
5156         int32_t off [sizeof(offsets)];
5157         UChar32 uniChar;
5158
5159         errorCode=U_ZERO_ERROR;
5160
5161         /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5162         pUIn++;
5163         ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5164         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5165         {
5166            log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5167         }
5168         pUIn--;
5169
5170         errorCode=U_ZERO_ERROR;
5171         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5172         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5173         {
5174            log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5175         }
5176         errorCode=U_ZERO_ERROR;
5177
5178         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5179         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5180         {
5181            log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5182         }
5183         errorCode=U_ZERO_ERROR;
5184
5185         /* 0 byte source request - no error, no pointer movement */
5186         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5187         ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5188         if(U_FAILURE(errorCode)) {
5189            log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5190         }
5191         if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5192         {
5193              log_err("Unexpected pointer move in 0 byte source request \n");
5194         }
5195         /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5196         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5197         if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5198         {
5199            log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5200         }
5201         if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5202         {
5203            log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5204         }
5205         errorCode = U_ZERO_ERROR;
5206
5207         /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5208
5209         pUIn = pszUnicode;
5210         ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
5211         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5212         {
5213            log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5214         }
5215
5216         errorCode = U_ZERO_ERROR;
5217
5218         pLIn = (const char *)pszLMBCS;
5219         ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5220         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5221         {
5222            log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5223         }
5224
5225         /* unpaired or chopped LMBCS surrogates */
5226
5227         /* OK high surrogate, Low surrogate is chopped */
5228         LIn [0] = (char)0x14;
5229         LIn [1] = (char)0xD8;
5230         LIn [2] = (char)0x01;
5231         LIn [3] = (char)0x14;
5232         LIn [4] = (char)0xDC;
5233         pLIn = LIn;
5234         errorCode = U_ZERO_ERROR;
5235         pUOut = UOut;
5236
5237         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5238         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5239         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5240         {
5241            log_err("Unexpected results on chopped low surrogate\n");
5242         }
5243
5244         /* chopped at surrogate boundary */
5245         LIn [0] = (char)0x14;
5246         LIn [1] = (char)0xD8;
5247         LIn [2] = (char)0x01;
5248         pLIn = LIn;
5249         errorCode = U_ZERO_ERROR;
5250         pUOut = UOut;
5251
5252         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5253         if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5254         {
5255            log_err("Unexpected results on chopped at surrogate boundary \n");
5256         }
5257
5258         /* unpaired surrogate plus valid Unichar */
5259         LIn [0] = (char)0x14;
5260         LIn [1] = (char)0xD8;
5261         LIn [2] = (char)0x01;
5262         LIn [3] = (char)0x14;
5263         LIn [4] = (char)0xC9;
5264         LIn [5] = (char)0xD0;
5265         pLIn = LIn;
5266         errorCode = U_ZERO_ERROR;
5267         pUOut = UOut;
5268
5269         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5270         if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5271         {
5272            log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5273         }
5274
5275      /* unpaired surrogate plus chopped Unichar */
5276         LIn [0] = (char)0x14;
5277         LIn [1] = (char)0xD8;
5278         LIn [2] = (char)0x01;
5279         LIn [3] = (char)0x14;
5280         LIn [4] = (char)0xC9;
5281
5282         pLIn = LIn;
5283         errorCode = U_ZERO_ERROR;
5284         pUOut = UOut;
5285
5286         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5287         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5288         {
5289            log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5290         }
5291
5292         /* unpaired surrogate plus valid non-Unichar */
5293         LIn [0] = (char)0x14;
5294         LIn [1] = (char)0xD8;
5295         LIn [2] = (char)0x01;
5296         LIn [3] = (char)0x0F;
5297         LIn [4] = (char)0x3B;
5298
5299         pLIn = LIn;
5300         errorCode = U_ZERO_ERROR;
5301         pUOut = UOut;
5302
5303         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5304         if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5305         {
5306            log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5307         }
5308
5309         /* unpaired surrogate plus chopped non-Unichar */
5310         LIn [0] = (char)0x14;
5311         LIn [1] = (char)0xD8;
5312         LIn [2] = (char)0x01;
5313         LIn [3] = (char)0x0F;
5314
5315         pLIn = LIn;
5316         errorCode = U_ZERO_ERROR;
5317         pUOut = UOut;
5318
5319         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5320
5321         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5322         {
5323            log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5324         }
5325       }
5326    }
5327   ucnv_close(cnv);  /* final cleanup */
5328}
5329
5330
5331static void TestJitterbug255()
5332{
5333    static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5334    const char *testBuffer = (const char *)testBytes;
5335    const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5336    UErrorCode status = U_ZERO_ERROR;
5337    /*UChar32 result;*/
5338    UConverter *cnv = 0;
5339
5340    cnv = ucnv_open("shift-jis", &status);
5341    if (U_FAILURE(status) || cnv == 0) {
5342        log_data_err("Failed to open the converter for SJIS.\n");
5343                return;
5344    }
5345    while (testBuffer != testEnd)
5346    {
5347        /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5348        if (U_FAILURE(status))
5349        {
5350            log_err("Failed to convert the next UChar for SJIS.\n");
5351            break;
5352        }
5353    }
5354    ucnv_close(cnv);
5355}
5356
5357static void TestEBCDICUS4XML()
5358{
5359    UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5360    static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5361    static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5362    static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5363    char target_x[] = {0x00, 0x00, 0x00, 0x00};
5364    UChar *unicodes = unicodes_x;
5365    const UChar *toUnicodeMaps = toUnicodeMaps_x;
5366    char *target = target_x;
5367    const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5368    UErrorCode status = U_ZERO_ERROR;
5369    UConverter *cnv = 0;
5370
5371    cnv = ucnv_open("ebcdic-xml-us", &status);
5372    if (U_FAILURE(status) || cnv == 0) {
5373        log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5374        return;
5375    }
5376    ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5377    if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5378        log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5379            u_errorName(status));
5380        printUSeqErr(unicodes_x, 3);
5381        printUSeqErr(toUnicodeMaps, 3);
5382    }
5383    status = U_ZERO_ERROR;
5384    ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5385    if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5386        log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5387            u_errorName(status));
5388        printSeqErr((const unsigned char*)target_x, 3);
5389        printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5390    }
5391    ucnv_close(cnv);
5392}
5393#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5394
5395#if !UCONFIG_NO_COLLATION
5396
5397static void TestJitterbug981(){
5398    const UChar* rules;
5399    int32_t rules_length, target_cap, bytes_needed, buff_size;
5400    UErrorCode status = U_ZERO_ERROR;
5401    UConverter *utf8cnv;
5402    UCollator* myCollator;
5403    char *buff;
5404    int numNeeded=0;
5405    utf8cnv = ucnv_open ("utf8", &status);
5406    if(U_FAILURE(status)){
5407        log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5408        return;
5409    }
5410    myCollator = ucol_open("zh", &status);
5411    if(U_FAILURE(status)){
5412        log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5413        ucnv_close(utf8cnv);
5414        return;
5415    }
5416
5417    rules = ucol_getRules(myCollator, &rules_length);
5418    buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5419    buff = malloc(buff_size);
5420
5421    target_cap = 0;
5422    do {
5423        ucnv_reset(utf8cnv);
5424        status = U_ZERO_ERROR;
5425        if(target_cap >= buff_size) {
5426            log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5427            break;
5428        }
5429        bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5430            rules, rules_length, &status);
5431        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5432        if(numNeeded!=0 && numNeeded!= bytes_needed){
5433            log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5434            break;
5435        }
5436        numNeeded = bytes_needed;
5437    } while (status == U_BUFFER_OVERFLOW_ERROR);
5438    ucol_close(myCollator);
5439    ucnv_close(utf8cnv);
5440    free(buff);
5441}
5442
5443#endif
5444
5445#if !UCONFIG_NO_LEGACY_CONVERSION
5446static void TestJitterbug1293(){
5447    static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5448    char target[256];
5449    UErrorCode status = U_ZERO_ERROR;
5450    UConverter* conv=NULL;
5451    int32_t target_cap, bytes_needed, numNeeded = 0;
5452    conv = ucnv_open("shift-jis",&status);
5453    if(U_FAILURE(status)){
5454      log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5455      return;
5456    }
5457
5458    do{
5459        target_cap =0;
5460        bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5461        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5462        if(numNeeded!=0 && numNeeded!= bytes_needed){
5463          log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5464        }
5465        numNeeded = bytes_needed;
5466    } while (status == U_BUFFER_OVERFLOW_ERROR);
5467    if(U_FAILURE(status)){
5468      log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5469      return;
5470    }
5471    ucnv_close(conv);
5472}
5473#endif
5474
5475static void TestJB5275_1(){
5476
5477    static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5478                                "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5479                                /* Switch script: */
5480                                "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5481                                "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5482                                "\xEF\x40\x3B\xB3\x0A";
5483    static const UChar expected[] ={
5484            0x003b, 0x0a15, 0x000a, /* Easy characters */
5485            0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5486            0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5487            0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5488            0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5489    };
5490
5491    UErrorCode status = U_ZERO_ERROR;
5492    UConverter* conv = ucnv_open("iscii-gur", &status);
5493    UChar dest[100] = {'\0'};
5494    UChar* target = dest;
5495    UChar* targetLimit = dest+100;
5496    const char* source = data;
5497    const char* sourceLimit = data+strlen(data);
5498    const UChar* exp = expected;
5499
5500    if (U_FAILURE(status)) {
5501        log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5502        return;
5503    }
5504
5505    log_verbose("Testing switching back to default script when new line is encountered.\n");
5506    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5507    if(U_FAILURE(status)){
5508        log_err("conversion failed: %s \n", u_errorName(status));
5509    }
5510    targetLimit = target;
5511    target = dest;
5512    printUSeq(target, targetLimit-target);
5513    while(target<targetLimit){
5514        if(*exp!=*target){
5515            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5516        }
5517        target++;
5518        exp++;
5519    }
5520    ucnv_close(conv);
5521}
5522
5523static void TestJB5275(){
5524    static const char* data =
5525    /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5526    /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5527    /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5528        "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5529        "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5530        "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5531        "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5532        "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5533        "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5534        /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5535    static const UChar expected[] ={
5536        0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5537        0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5538        0x0038, 0x0C95, 0x000A, /* Kannada test */
5539        0x0039, 0x0D15, 0x000A, /* Malayalam test */
5540        0x003A, 0x0A95, 0x000A, /* Gujarati test */
5541        0x003B, 0x0A15, 0x000A, /* Punjabi test */
5542    };
5543
5544    UErrorCode status = U_ZERO_ERROR;
5545    UConverter* conv = ucnv_open("iscii", &status);
5546    UChar dest[100] = {'\0'};
5547    UChar* target = dest;
5548    UChar* targetLimit = dest+100;
5549    const char* source = data;
5550    const char* sourceLimit = data+strlen(data);
5551    const UChar* exp = expected;
5552    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5553    if(U_FAILURE(status)){
5554        log_err("conversion failed: %s \n", u_errorName(status));
5555    }
5556    targetLimit = target;
5557    target = dest;
5558
5559    printUSeq(target, targetLimit-target);
5560
5561    while(target<targetLimit){
5562        if(*exp!=*target){
5563            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5564        }
5565        target++;
5566        exp++;
5567    }
5568    ucnv_close(conv);
5569}
5570
5571static void
5572TestIsFixedWidth() {
5573    UErrorCode status = U_ZERO_ERROR;
5574    UConverter *cnv = NULL;
5575    int32_t i;
5576
5577    const char *fixedWidth[] = {
5578            "US-ASCII",
5579            "UTF32",
5580            "ibm-5478_P100-1995"
5581    };
5582
5583    const char *notFixedWidth[] = {
5584            "GB18030",
5585            "UTF8",
5586            "windows-949-2000",
5587            "UTF16"
5588    };
5589
5590    for (i = 0; i < LENGTHOF(fixedWidth); i++) {
5591        cnv = ucnv_open(fixedWidth[i], &status);
5592        if (cnv == NULL || U_FAILURE(status)) {
5593            log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5594            continue;
5595        }
5596
5597        if (!ucnv_isFixedWidth(cnv, &status)) {
5598            log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5599        }
5600        ucnv_close(cnv);
5601    }
5602
5603    for (i = 0; i < LENGTHOF(notFixedWidth); i++) {
5604        cnv = ucnv_open(notFixedWidth[i], &status);
5605        if (cnv == NULL || U_FAILURE(status)) {
5606            log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5607            continue;
5608        }
5609
5610        if (ucnv_isFixedWidth(cnv, &status)) {
5611            log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5612        }
5613        ucnv_close(cnv);
5614    }
5615}
5616