1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File CCONVTST.C
9*
10* Modification History:
11*        Name                     Description
12*    Steven R. Loomis     7/8/1999      Adding input buffer test
13********************************************************************************
14*/
15#include <stdio.h>
16#include "cstring.h"
17#include "unicode/uloc.h"
18#include "unicode/ucnv.h"
19#include "unicode/ucnv_err.h"
20#include "unicode/ucnv_cb.h"
21#include "cintltst.h"
22#include "unicode/utypes.h"
23#include "unicode/ustring.h"
24#include "unicode/ucol.h"
25#include "cmemory.h"
26#include "nucnvtst.h"
27
28#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
29
30static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
31static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
32#if !UCONFIG_NO_COLLATION
33static void TestJitterbug981(void);
34#endif
35static void TestJitterbug1293(void);
36static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
37static void TestConverterTypesAndStarters(void);
38static void TestAmbiguous(void);
39static void TestSignatureDetection(void);
40static void TestUTF7(void);
41static void TestIMAP(void);
42static void TestUTF8(void);
43static void TestCESU8(void);
44static void TestUTF16(void);
45static void TestUTF16BE(void);
46static void TestUTF16LE(void);
47static void TestUTF32(void);
48static void TestUTF32BE(void);
49static void TestUTF32LE(void);
50static void TestLATIN1(void);
51
52#if !UCONFIG_NO_LEGACY_CONVERSION
53static void TestSBCS(void);
54static void TestDBCS(void);
55static void TestMBCS(void);
56#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
57static void TestICCRunout(void);
58#endif
59
60#ifdef U_ENABLE_GENERIC_ISO_2022
61static void TestISO_2022(void);
62#endif
63
64static void TestISO_2022_JP(void);
65static void TestISO_2022_JP_1(void);
66static void TestISO_2022_JP_2(void);
67static void TestISO_2022_KR(void);
68static void TestISO_2022_KR_1(void);
69static void TestISO_2022_CN(void);
70#if 0
71   /*
72    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
73    */
74static void TestISO_2022_CN_EXT(void);
75#endif
76static void TestJIS(void);
77static void TestHZ(void);
78#endif
79
80static void TestSCSU(void);
81
82#if !UCONFIG_NO_LEGACY_CONVERSION
83static void TestEBCDIC_STATEFUL(void);
84static void TestGB18030(void);
85static void TestLMBCS(void);
86static void TestJitterbug255(void);
87static void TestEBCDICUS4XML(void);
88#if 0
89   /*
90    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
91    */
92static void TestJitterbug915(void);
93#endif
94static void TestISCII(void);
95
96static void TestCoverageMBCS(void);
97static void TestJitterbug2346(void);
98static void TestJitterbug2411(void);
99static void TestJB5275(void);
100static void TestJB5275_1(void);
101static void TestJitterbug6175(void);
102
103static void TestIsFixedWidth(void);
104#endif
105
106static void TestInBufSizes(void);
107
108static void TestRoundTrippingAllUTF(void);
109static void TestConv(const uint16_t in[],
110                     int len,
111                     const char* conv,
112                     const char* lang,
113                     char byteArr[],
114                     int byteArrLen);
115
116/* open a converter, using test data if it begins with '@' */
117static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
118
119
120#define NEW_MAX_BUFFER 999
121
122static int32_t  gInBufferSize = NEW_MAX_BUFFER;
123static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
124static char     gNuConvTestName[1024];
125
126#define nct_min(x,y)  ((x<y) ? x : y)
127
128static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
129{
130  if(cnv && cnv[0] == '@') {
131    return ucnv_openPackage(loadTestData(err), cnv+1, err);
132  } else {
133    return ucnv_open(cnv, err);
134  }
135}
136
137static void printSeq(const unsigned char* a, int len)
138{
139    int i=0;
140    log_verbose("{");
141    while (i<len)
142        log_verbose("0x%02x ", a[i++]);
143    log_verbose("}\n");
144}
145
146static void printUSeq(const UChar* a, int len)
147{
148    int i=0;
149    log_verbose("{U+");
150    while (i<len) log_verbose("0x%04x ", a[i++]);
151    log_verbose("}\n");
152}
153
154static void printSeqErr(const unsigned char* a, int len)
155{
156    int i=0;
157    fprintf(stderr, "{");
158    while (i<len)
159        fprintf(stderr, "0x%02x ", a[i++]);
160    fprintf(stderr, "}\n");
161}
162
163static void printUSeqErr(const UChar* a, int len)
164{
165    int i=0;
166    fprintf(stderr, "{U+");
167    while (i<len)
168        fprintf(stderr, "0x%04x ", a[i++]);
169    fprintf(stderr,"}\n");
170}
171
172static void
173TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
174{
175     const char* s0;
176     const char* s=(char*)source;
177     const int32_t *r=results;
178     UErrorCode errorCode=U_ZERO_ERROR;
179     UChar32 c;
180
181     while(s<limit) {
182        s0=s;
183        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
184        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
185            break; /* no more significant input */
186        } else if(U_FAILURE(errorCode)) {
187            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
188            break;
189        } else if(
190            /* test the expected number of input bytes only if >=0 */
191            (*r>=0 && (int32_t)(s-s0)!=*r) ||
192            c!=*(r+1)
193        ) {
194            log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
195                message, c, (s-s0), *(r+1), *r);
196            break;
197        }
198        r+=2;
199    }
200}
201
202static void
203TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
204{
205     const char* s=(char*)source;
206     UErrorCode errorCode=U_ZERO_ERROR;
207     uint32_t c;
208     c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
209     if(errorCode != expected){
210        log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
211     }
212     if(c != 0xFFFD && c != 0xffff){
213        log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
214     }
215
216}
217
218static void TestInBufSizes(void)
219{
220  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
221#if 1
222  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
223  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
224  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
225  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
226  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
227  TestNewConvertWithBufferSizes(1,1);
228  TestNewConvertWithBufferSizes(2,3);
229  TestNewConvertWithBufferSizes(3,2);
230#endif
231}
232
233static void TestOutBufSizes(void)
234{
235#if 1
236  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
237  TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
238  TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
239  TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
240  TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
241  TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
242
243#endif
244}
245
246
247void addTestNewConvert(TestNode** root)
248{
249#if !UCONFIG_NO_FILE_IO
250   addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
251   addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
252#endif
253   addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
254   addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
255   addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
256   addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
257   addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
258   addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
259
260   /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
261   addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
262   addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
263   addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
264   addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
265   addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
266   addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
267   addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
268
269#if !UCONFIG_NO_LEGACY_CONVERSION
270   addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
271#endif
272
273   addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
274
275#if !UCONFIG_NO_LEGACY_CONVERSION
276   addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
277#if !UCONFIG_NO_FILE_IO
278   addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
279   addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
280#endif
281   addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
282
283#ifdef U_ENABLE_GENERIC_ISO_2022
284   addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
285#endif
286   /* BEGIN android-changed
287      To save space, Android does not build full ISO2022 CJK tables.
288      We turn off the tests here.
289   addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
290   END android-changed */
291   addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
292   /* BEGIN android-changed
293   addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
294   addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
295   addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
296   addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
297   addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
298  END android-changed */
299   /*
300    * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
301   addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
302   addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
303    */
304   addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
305#endif
306
307   addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
308
309#if !UCONFIG_NO_LEGACY_CONVERSION
310   addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
311   addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
312   addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
313   addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
314   addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
315   addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
316   addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
317#if !UCONFIG_NO_COLLATION
318   /* BEGIN android-removed
319      To save space, Android does not include the collation tailoring rules.
320      Skip the related tests.
321   addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
322      END android-removed */
323#endif
324
325   addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
326#endif
327
328
329#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
330   addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
331#endif
332
333   addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
334
335#if !UCONFIG_NO_LEGACY_CONVERSION
336   addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
337   addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
338   /* BEGIN android-removed
339      To save space, Android does not build full ISO2022 CJK tables.
340      We turn off the tests here.
341   addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
342      END android-removed */
343
344   addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
345#endif
346}
347
348
349/* Note that this test already makes use of statics, so it's not really
350   multithread safe.
351   This convenience function lets us make the error messages actually useful.
352*/
353
354static void setNuConvTestName(const char *codepage, const char *direction)
355{
356    sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
357        codepage,
358        direction,
359        (int)gInBufferSize,
360        (int)gOutBufferSize);
361}
362
363typedef enum
364{
365  TC_OK       = 0,  /* test was OK */
366  TC_MISMATCH = 1,  /* Match failed - err was printed */
367  TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
368} ETestConvertResult;
369
370/* Note: This function uses global variables and it will not do offset
371checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
372static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
373                const char *codepage, const int32_t *expectOffsets , UBool useFallback)
374{
375    UErrorCode status = U_ZERO_ERROR;
376    UConverter *conv = 0;
377    char    junkout[NEW_MAX_BUFFER]; /* FIX */
378    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
379    char *p;
380    const UChar *src;
381    char *end;
382    char *targ;
383    int32_t *offs;
384    int i;
385    int32_t   realBufferSize;
386    char *realBufferEnd;
387    const UChar *realSourceEnd;
388    const UChar *sourceLimit;
389    UBool checkOffsets = TRUE;
390    UBool doFlush;
391
392    for(i=0;i<NEW_MAX_BUFFER;i++)
393        junkout[i] = (char)0xF0;
394    for(i=0;i<NEW_MAX_BUFFER;i++)
395        junokout[i] = 0xFF;
396
397    setNuConvTestName(codepage, "FROM");
398
399    log_verbose("\n=========  %s\n", gNuConvTestName);
400
401    conv = my_ucnv_open(codepage, &status);
402
403    if(U_FAILURE(status))
404    {
405        log_data_err("Couldn't open converter %s\n",codepage);
406        return TC_FAIL;
407    }
408    if(useFallback){
409        ucnv_setFallback(conv,useFallback);
410    }
411
412    log_verbose("Converter opened..\n");
413
414    src = source;
415    targ = junkout;
416    offs = junokout;
417
418    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
419    realBufferEnd = junkout + realBufferSize;
420    realSourceEnd = source + sourceLen;
421
422    if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
423        checkOffsets = FALSE;
424
425    do
426    {
427      end = nct_min(targ + gOutBufferSize, realBufferEnd);
428      sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
429
430      doFlush = (UBool)(sourceLimit == realSourceEnd);
431
432      if(targ == realBufferEnd) {
433        log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
434        return TC_FAIL;
435      }
436      log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
437
438
439      status = U_ZERO_ERROR;
440
441      ucnv_fromUnicode (conv,
442                        &targ,
443                        end,
444                        &src,
445                        sourceLimit,
446                        checkOffsets ? offs : NULL,
447                        doFlush, /* flush if we're at the end of the input data */
448                        &status);
449    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
450
451    if(U_FAILURE(status)) {
452      log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
453      return TC_FAIL;
454    }
455
456    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
457                sourceLen, targ-junkout);
458
459    if(getTestOption(VERBOSITY_OPTION))
460    {
461      char junk[9999];
462      char offset_str[9999];
463      char *ptr;
464
465      junk[0] = 0;
466      offset_str[0] = 0;
467      for(ptr = junkout;ptr<targ;ptr++) {
468        sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
469        sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
470      }
471
472      log_verbose(junk);
473      printSeq((const uint8_t *)expect, expectLen);
474      if ( checkOffsets ) {
475        log_verbose("\nOffsets:");
476        log_verbose(offset_str);
477      }
478      log_verbose("\n");
479    }
480    ucnv_close(conv);
481
482    if(expectLen != targ-junkout) {
483      log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
484      log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
485      fprintf(stderr, "Got:\n");
486      printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
487      fprintf(stderr, "Expected:\n");
488      printSeqErr((const unsigned char*)expect, expectLen);
489      return TC_MISMATCH;
490    }
491
492    if (checkOffsets && (expectOffsets != 0) ) {
493      log_verbose("comparing %d offsets..\n", targ-junkout);
494      if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
495        log_err("did not get the expected offsets. %s\n", gNuConvTestName);
496        printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
497        log_err("\n");
498        log_err("Got  :     ");
499        for(p=junkout;p<targ;p++) {
500          log_err("%d,", junokout[p-junkout]);
501        }
502        log_err("\n");
503        log_err("Expected:  ");
504        for(i=0; i<(targ-junkout); i++) {
505          log_err("%d,", expectOffsets[i]);
506        }
507        log_err("\n");
508      }
509    }
510
511    log_verbose("comparing..\n");
512    if(!memcmp(junkout, expect, expectLen)) {
513      log_verbose("Matches!\n");
514      return TC_OK;
515    } else {
516      log_err("String does not match u->%s\n", gNuConvTestName);
517      printUSeqErr(source, sourceLen);
518      fprintf(stderr, "Got:\n");
519      printSeqErr((const unsigned char *)junkout, expectLen);
520      fprintf(stderr, "Expected:\n");
521      printSeqErr((const unsigned char *)expect, expectLen);
522
523      return TC_MISMATCH;
524    }
525}
526
527/* Note: This function uses global variables and it will not do offset
528checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
529static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
530                                          const char *codepage, const int32_t *expectOffsets, UBool useFallback)
531{
532    UErrorCode status = U_ZERO_ERROR;
533    UConverter *conv = 0;
534    UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
535    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
536    const char *src;
537    const char *realSourceEnd;
538    const char *srcLimit;
539    UChar *p;
540    UChar *targ;
541    UChar *end;
542    int32_t *offs;
543    int i;
544    UBool   checkOffsets = TRUE;
545
546    int32_t   realBufferSize;
547    UChar *realBufferEnd;
548
549
550    for(i=0;i<NEW_MAX_BUFFER;i++)
551        junkout[i] = 0xFFFE;
552
553    for(i=0;i<NEW_MAX_BUFFER;i++)
554        junokout[i] = -1;
555
556    setNuConvTestName(codepage, "TO");
557
558    log_verbose("\n=========  %s\n", gNuConvTestName);
559
560    conv = my_ucnv_open(codepage, &status);
561
562    if(U_FAILURE(status))
563    {
564        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
565        return TC_FAIL;
566    }
567    if(useFallback){
568        ucnv_setFallback(conv,useFallback);
569    }
570    log_verbose("Converter opened..\n");
571
572    src = (const char *)source;
573    targ = junkout;
574    offs = junokout;
575
576    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
577    realBufferEnd = junkout + realBufferSize;
578    realSourceEnd = src + sourcelen;
579
580    if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
581        checkOffsets = FALSE;
582
583    do
584    {
585        end = nct_min( targ + gOutBufferSize, realBufferEnd);
586        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
587
588        if(targ == realBufferEnd)
589        {
590            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
591            return TC_FAIL;
592        }
593        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
594
595        /* oldTarg = targ; */
596
597        status = U_ZERO_ERROR;
598
599        ucnv_toUnicode (conv,
600                &targ,
601                end,
602                &src,
603                srcLimit,
604                checkOffsets ? offs : NULL,
605                (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
606                &status);
607
608        /*        offs += (targ-oldTarg); */
609
610      } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
611
612    if(U_FAILURE(status))
613    {
614        log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
615        return TC_FAIL;
616    }
617
618    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
619        sourcelen, targ-junkout);
620    if(getTestOption(VERBOSITY_OPTION))
621    {
622        char junk[9999];
623        char offset_str[9999];
624        UChar *ptr;
625
626        junk[0] = 0;
627        offset_str[0] = 0;
628
629        for(ptr = junkout;ptr<targ;ptr++)
630        {
631            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
632            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
633        }
634
635        log_verbose(junk);
636        printUSeq(expect, expectlen);
637        if ( checkOffsets )
638          {
639            log_verbose("\nOffsets:");
640            log_verbose(offset_str);
641          }
642        log_verbose("\n");
643    }
644    ucnv_close(conv);
645
646    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
647
648    if (checkOffsets && (expectOffsets != 0))
649    {
650        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
651            log_err("did not get the expected offsets. %s\n",gNuConvTestName);
652            log_err("Got:      ");
653            for(p=junkout;p<targ;p++) {
654                log_err("%d,", junokout[p-junkout]);
655            }
656            log_err("\n");
657            log_err("Expected: ");
658            for(i=0; i<(targ-junkout); i++) {
659                log_err("%d,", expectOffsets[i]);
660            }
661            log_err("\n");
662            log_err("output:   ");
663            for(i=0; i<(targ-junkout); i++) {
664                log_err("%X,", junkout[i]);
665            }
666            log_err("\n");
667            log_err("input:    ");
668            for(i=0; i<(src-(const char *)source); i++) {
669                log_err("%X,", (unsigned char)source[i]);
670            }
671            log_err("\n");
672        }
673    }
674
675    if(!memcmp(junkout, expect, expectlen*2))
676    {
677        log_verbose("Matches!\n");
678        return TC_OK;
679    }
680    else
681    {
682        log_err("String does not match. %s\n", gNuConvTestName);
683        log_verbose("String does not match. %s\n", gNuConvTestName);
684        printf("\nGot:");
685        printUSeqErr(junkout, expectlen);
686        printf("\nExpected:");
687        printUSeqErr(expect, expectlen);
688        return TC_MISMATCH;
689    }
690}
691
692
693static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
694{
695/** test chars #1 */
696    /*  1 2 3  1Han 2Han 3Han .  */
697    static const UChar   sampleText[] =
698     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
699    static const UChar sampleTextRoundTripUnmappable[] =
700    { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
701
702
703    static const uint8_t expectedUTF8[] =
704     { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
705    static const int32_t toUTF8Offs[] =
706     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
707    static const int32_t fmUTF8Offs[] =
708     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
709
710#ifdef U_ENABLE_GENERIC_ISO_2022
711    /* Same as UTF8, but with ^[%B preceeding */
712    static const const uint8_t expectedISO2022[] =
713     { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
714    static const int32_t toISO2022Offs[]     =
715     { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
716       0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
717    static const int32_t fmISO2022Offs[] =
718     { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
719#endif
720
721    /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
722    static const uint8_t expectedIBM930[] =
723     { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
724    static const int32_t toIBM930Offs[] =
725     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
726    static const int32_t fmIBM930Offs[] =
727     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
728
729    /* 1 2 3 0 h1 h2 h3 . MBCS*/
730    static const uint8_t expectedIBM943[] =
731     {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
732    static const int32_t toIBM943Offs    [] =
733     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
734    static const int32_t fmIBM943Offs[] =
735     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
736
737    /* 1 2 3 0 h1 h2 h3 . DBCS*/
738    static const uint8_t expectedIBM9027[] =
739     {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
740    static const int32_t toIBM9027Offs    [] =
741     {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
742
743     /* 1 2 3 0 <?> <?> <?> . SBCS*/
744    static const uint8_t expectedIBM920[] =
745     {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
746    static const int32_t toIBM920Offs    [] =
747     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
748
749    /* 1 2 3 0 <?> <?> <?> . SBCS*/
750    static const uint8_t expectedISO88593[] =
751     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
752    static const int32_t toISO88593Offs[]     =
753     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
754
755    /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
756    static const uint8_t expectedLATIN1[] =
757     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
758    static const int32_t toLATIN1Offs[]     =
759     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
760
761
762    /*  etc */
763    static const uint8_t expectedUTF16BE[] =
764     { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
765    static const int32_t toUTF16BEOffs[]=
766     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
767    static const int32_t fmUTF16BEOffs[] =
768     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
769
770    static const uint8_t expectedUTF16LE[] =
771     { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
772    static const int32_t toUTF16LEOffs[]=
773     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
774    static const int32_t fmUTF16LEOffs[] =
775     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
776
777    static const uint8_t expectedUTF32BE[] =
778     { 0x00, 0x00, 0x00, 0x31,
779       0x00, 0x00, 0x00, 0x32,
780       0x00, 0x00, 0x00, 0x33,
781       0x00, 0x00, 0x00, 0x00,
782       0x00, 0x00, 0x4e, 0x00,
783       0x00, 0x00, 0x4e, 0x8c,
784       0x00, 0x00, 0x4e, 0x09,
785       0x00, 0x00, 0x00, 0x2e,
786       0x00, 0x02, 0x00, 0x21 };
787    static const int32_t toUTF32BEOffs[]=
788     { 0x00, 0x00, 0x00, 0x00,
789       0x01, 0x01, 0x01, 0x01,
790       0x02, 0x02, 0x02, 0x02,
791       0x03, 0x03, 0x03, 0x03,
792       0x04, 0x04, 0x04, 0x04,
793       0x05, 0x05, 0x05, 0x05,
794       0x06, 0x06, 0x06, 0x06,
795       0x07, 0x07, 0x07, 0x07,
796       0x08, 0x08, 0x08, 0x08,
797       0x08, 0x08, 0x08, 0x08 };
798    static const int32_t fmUTF32BEOffs[] =
799     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
800
801    static const uint8_t expectedUTF32LE[] =
802     { 0x31, 0x00, 0x00, 0x00,
803       0x32, 0x00, 0x00, 0x00,
804       0x33, 0x00, 0x00, 0x00,
805       0x00, 0x00, 0x00, 0x00,
806       0x00, 0x4e, 0x00, 0x00,
807       0x8c, 0x4e, 0x00, 0x00,
808       0x09, 0x4e, 0x00, 0x00,
809       0x2e, 0x00, 0x00, 0x00,
810       0x21, 0x00, 0x02, 0x00 };
811    static const int32_t toUTF32LEOffs[]=
812     { 0x00, 0x00, 0x00, 0x00,
813       0x01, 0x01, 0x01, 0x01,
814       0x02, 0x02, 0x02, 0x02,
815       0x03, 0x03, 0x03, 0x03,
816       0x04, 0x04, 0x04, 0x04,
817       0x05, 0x05, 0x05, 0x05,
818       0x06, 0x06, 0x06, 0x06,
819       0x07, 0x07, 0x07, 0x07,
820       0x08, 0x08, 0x08, 0x08,
821       0x08, 0x08, 0x08, 0x08 };
822    static const int32_t fmUTF32LEOffs[] =
823     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
824
825
826
827
828/** Test chars #2 **/
829
830    /* Sahha [health],  slashed h's */
831    static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
832    static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
833
834    /* LMBCS */
835    static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
836    static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
837    static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
838    static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
839    /*********************************** START OF CODE finally *************/
840
841    gInBufferSize = insize;
842    gOutBufferSize = outsize;
843
844    log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
845
846
847    /*UTF-8*/
848    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
849        expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
850
851    log_verbose("Test surrogate behaviour for UTF8\n");
852    {
853        static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
854        static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
855                           0xf0, 0x90, 0x90, 0x81,
856                           0xef, 0xbf, 0xbd
857        };
858        static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
859        testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
860                         expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
861
862
863    }
864
865#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
866    /*ISO-2022*/
867    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
868        expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
869#endif
870
871    /*UTF16 LE*/
872    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
873        expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
874    /*UTF16 BE*/
875    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
876        expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
877    /*UTF32 LE*/
878    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
879        expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
880    /*UTF32 BE*/
881    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
882        expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
883
884    /*LATIN_1*/
885    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
886        expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
887
888#if !UCONFIG_NO_LEGACY_CONVERSION
889    /*EBCDIC_STATEFUL*/
890    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
891        expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
892
893    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
894        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
895
896    /*MBCS*/
897
898    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
899        expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
900    /*DBCS*/
901    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
902        expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
903    /*SBCS*/
904    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
905        expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
906    /*SBCS*/
907    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
908        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
909#endif
910
911
912/****/
913
914    /*UTF-8*/
915    testConvertToU(expectedUTF8, sizeof(expectedUTF8),
916        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
917#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
918    /*ISO-2022*/
919    testConvertToU(expectedISO2022, sizeof(expectedISO2022),
920        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
921#endif
922
923    /*UTF16 LE*/
924    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
925        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
926    /*UTF16 BE*/
927    testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
928        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
929    /*UTF32 LE*/
930    testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
931        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
932    /*UTF32 BE*/
933    testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
934        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
935
936#if !UCONFIG_NO_LEGACY_CONVERSION
937    /*EBCDIC_STATEFUL*/
938    testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
939            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE);
940    /*MBCS*/
941    testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
942            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE);
943#endif
944
945    /* Try it again to make sure it still works */
946    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
947        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
948
949#if !UCONFIG_NO_LEGACY_CONVERSION
950    testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
951        malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
952
953    testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
954        expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
955
956    /*LMBCS*/
957    testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
958        expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
959    testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
960        LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
961#endif
962
963    /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
964    {
965        /* encode directly set D and set O */
966        static const uint8_t utf7[] = {
967            /*
968                Hi Mom -+Jjo--!
969                A+ImIDkQ.
970                +-
971                +ZeVnLIqe-
972            */
973            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
974            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
975            0x2b, 0x2d,
976            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
977        };
978        static const UChar unicode[] = {
979            /*
980                Hi Mom -<WHITE SMILING FACE>-!
981                A<NOT IDENTICAL TO><ALPHA>.
982                +
983                [Japanese word "nihongo"]
984            */
985            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
986            0x41, 0x2262, 0x0391, 0x2e,
987            0x2b,
988            0x65e5, 0x672c, 0x8a9e
989        };
990        static const int32_t toUnicodeOffsets[] = {
991            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
992            15, 17, 19, 23,
993            24,
994            27, 29, 32
995        };
996        static const int32_t fromUnicodeOffsets[] = {
997            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
998            11, 12, 12, 12, 13, 13, 13, 13, 14,
999            15, 15,
1000            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1001        };
1002
1003        /* same but escaping set O (the exclamation mark) */
1004        static const uint8_t utf7Restricted[] = {
1005            /*
1006                Hi Mom -+Jjo--+ACE-
1007                A+ImIDkQ.
1008                +-
1009                +ZeVnLIqe-
1010            */
1011            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1012            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1013            0x2b, 0x2d,
1014            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1015        };
1016        static const int32_t toUnicodeOffsetsR[] = {
1017            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1018            19, 21, 23, 27,
1019            28,
1020            31, 33, 36
1021        };
1022        static const int32_t fromUnicodeOffsetsR[] = {
1023            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1024            11, 12, 12, 12, 13, 13, 13, 13, 14,
1025            15, 15,
1026            16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1027        };
1028
1029        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1030
1031        testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
1032
1033        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1034
1035        testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1036    }
1037
1038    /*
1039     * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1040     * modified according to RFC 2060,
1041     * and supplemented with the one example in RFC 2060 itself.
1042     */
1043    {
1044        static const uint8_t imap[] = {
1045            /*  Hi Mom -&Jjo--!
1046                A&ImIDkQ-.
1047                &-
1048                &ZeVnLIqe-
1049                \
1050                ~peter
1051                /mail
1052                /&ZeVnLIqe-
1053                /&U,BTFw-
1054            */
1055            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1056            0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1057            0x26, 0x2d,
1058            0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1059            0x5c,
1060            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1061            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1062            0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1063            0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1064        };
1065        static const UChar unicode[] = {
1066            /*  Hi Mom -<WHITE SMILING FACE>-!
1067                A<NOT IDENTICAL TO><ALPHA>.
1068                &
1069                [Japanese word "nihongo"]
1070                \
1071                ~peter
1072                /mail
1073                /<65e5, 672c, 8a9e>
1074                /<53f0, 5317>
1075            */
1076            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1077            0x41, 0x2262, 0x0391, 0x2e,
1078            0x26,
1079            0x65e5, 0x672c, 0x8a9e,
1080            0x5c,
1081            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1082            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1083            0x2f, 0x65e5, 0x672c, 0x8a9e,
1084            0x2f, 0x53f0, 0x5317
1085        };
1086        static const int32_t toUnicodeOffsets[] = {
1087            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1088            15, 17, 19, 24,
1089            25,
1090            28, 30, 33,
1091            37,
1092            38, 39, 40, 41, 42, 43,
1093            44, 45, 46, 47, 48,
1094            49, 51, 53, 56,
1095            60, 62, 64
1096        };
1097        static const int32_t fromUnicodeOffsets[] = {
1098            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1099            11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1100            15, 15,
1101            16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1102            19,
1103            20, 21, 22, 23, 24, 25,
1104            26, 27, 28, 29, 30,
1105            31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1106            35, 36, 36, 36, 37, 37, 37, 37, 37
1107        };
1108
1109        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1110
1111        testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1112    }
1113
1114    /* Test UTF-8 bad data handling*/
1115    {
1116        static const uint8_t utf8[]={
1117            0x61,
1118            0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1119            0x00,
1120            0x62,
1121            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1122            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1123            0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1124            0xdf, 0xbf,                     /* 7ff */
1125            0xbf,                           /* truncated tail */
1126            0xf4, 0x90, 0x80, 0x80,         /* 11FFFF */
1127            0x02
1128        };
1129
1130        static const uint16_t utf8Expected[]={
1131            0x0061,
1132            0xfffd,
1133            0x0000,
1134            0x0062,
1135            0xfffd,
1136            0xfffd,
1137            0xdbff, 0xdfff,
1138            0x07ff,
1139            0xfffd,
1140            0xfffd,
1141            0x0002
1142        };
1143
1144        static const int32_t utf8Offsets[]={
1145            0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1146        };
1147        testConvertToU(utf8, sizeof(utf8),
1148                       utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1149
1150    }
1151
1152    /* Test UTF-32BE bad data handling*/
1153    {
1154        static const uint8_t utf32[]={
1155            0x00, 0x00, 0x00, 0x61,
1156            0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1157            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1158            0x00, 0x00, 0x00, 0x62,
1159            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1160            0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1161            0x00, 0x00, 0x01, 0x62,
1162            0x00, 0x00, 0x02, 0x62
1163        };
1164        static const uint16_t utf32Expected[]={
1165            0x0061,
1166            0xfffd,         /* 0x110000 out of range */
1167            0xDBFF,         /* 0x10FFFF in range */
1168            0xDFFF,
1169            0x0062,
1170            0xfffd,         /* 0xffffffff out of range */
1171            0xfffd,         /* 0x7fffffff out of range */
1172            0x0162,
1173            0x0262
1174        };
1175        static const int32_t utf32Offsets[]={
1176            0, 4, 8, 8, 12, 16, 20, 24, 28
1177        };
1178        static const uint8_t utf32ExpectedBack[]={
1179            0x00, 0x00, 0x00, 0x61,
1180            0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1181            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1182            0x00, 0x00, 0x00, 0x62,
1183            0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1184            0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1185            0x00, 0x00, 0x01, 0x62,
1186            0x00, 0x00, 0x02, 0x62
1187        };
1188        static const int32_t utf32OffsetsBack[]={
1189            0,0,0,0,
1190            1,1,1,1,
1191            2,2,2,2,
1192            4,4,4,4,
1193            5,5,5,5,
1194            6,6,6,6,
1195            7,7,7,7,
1196            8,8,8,8
1197        };
1198
1199        testConvertToU(utf32, sizeof(utf32),
1200                       utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1201        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1202            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1203    }
1204
1205    /* Test UTF-32LE bad data handling*/
1206    {
1207        static const uint8_t utf32[]={
1208            0x61, 0x00, 0x00, 0x00,
1209            0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1210            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1211            0x62, 0x00, 0x00, 0x00,
1212            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1213            0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1214            0x62, 0x01, 0x00, 0x00,
1215            0x62, 0x02, 0x00, 0x00,
1216        };
1217
1218        static const uint16_t utf32Expected[]={
1219            0x0061,
1220            0xfffd,         /* 0x110000 out of range */
1221            0xDBFF,         /* 0x10FFFF in range */
1222            0xDFFF,
1223            0x0062,
1224            0xfffd,         /* 0xffffffff out of range */
1225            0xfffd,         /* 0x7fffffff out of range */
1226            0x0162,
1227            0x0262
1228        };
1229        static const int32_t utf32Offsets[]={
1230            0, 4, 8, 8, 12, 16, 20, 24, 28
1231        };
1232        static const uint8_t utf32ExpectedBack[]={
1233            0x61, 0x00, 0x00, 0x00,
1234            0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1235            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1236            0x62, 0x00, 0x00, 0x00,
1237            0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1238            0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1239            0x62, 0x01, 0x00, 0x00,
1240            0x62, 0x02, 0x00, 0x00
1241        };
1242        static const int32_t utf32OffsetsBack[]={
1243            0,0,0,0,
1244            1,1,1,1,
1245            2,2,2,2,
1246            4,4,4,4,
1247            5,5,5,5,
1248            6,6,6,6,
1249            7,7,7,7,
1250            8,8,8,8
1251        };
1252        testConvertToU(utf32, sizeof(utf32),
1253            utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1254        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1255            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1256    }
1257}
1258
1259static void TestCoverageMBCS(){
1260#if 0
1261    UErrorCode status = U_ZERO_ERROR;
1262    const char *directory = loadTestData(&status);
1263    char* tdpath = NULL;
1264    char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1265    int len = strlen(directory);
1266    char* index=NULL;
1267
1268    tdpath = (char*) malloc(sizeof(char) * (len * 2));
1269    uprv_strcpy(saveDirectory,u_getDataDirectory());
1270    log_verbose("Retrieved data directory %s \n",saveDirectory);
1271    uprv_strcpy(tdpath,directory);
1272    index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1273
1274    if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1275            *(index+1)=0;
1276    }
1277    u_setDataDirectory(tdpath);
1278    log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1279#endif
1280
1281    /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1282      which is test file for MBCS conversion with single-byte codepage data.*/
1283    {
1284
1285        /* MBCS with single byte codepage data test1.ucm*/
1286        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1287        const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1288        int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1289
1290        /*from Unicode*/
1291        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1292            expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1293    }
1294
1295    /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1296      which is test file for MBCS conversion with three-byte codepage data.*/
1297    {
1298
1299        /* MBCS with three byte codepage data test3.ucm*/
1300        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1301        const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1302        int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1303
1304        const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1305        const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1306        int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1307
1308        /*from Unicode*/
1309        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1310            expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1311
1312        /*to Unicode*/
1313        testConvertToU(test3input, sizeof(test3input),
1314            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1315
1316    }
1317
1318    /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1319      which is test file for MBCS conversion with four-byte codepage data.*/
1320    {
1321
1322        /* MBCS with three byte codepage data test4.ucm*/
1323        static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1324        static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1325        static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1326
1327        static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1328        static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1329        static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1330
1331        /*from Unicode*/
1332        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1333            expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1334
1335        /*to Unicode*/
1336        testConvertToU(test4input, sizeof(test4input),
1337            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1338
1339    }
1340#if 0
1341    free(tdpath);
1342    /* restore the original data directory */
1343    log_verbose("Setting the data directory to %s \n", saveDirectory);
1344    u_setDataDirectory(saveDirectory);
1345    free(saveDirectory);
1346#endif
1347
1348}
1349
1350static void TestConverterType(const char *convName, UConverterType convType) {
1351    UConverter* myConverter;
1352    UErrorCode err = U_ZERO_ERROR;
1353
1354    myConverter = my_ucnv_open(convName, &err);
1355
1356    if (U_FAILURE(err)) {
1357        log_data_err("Failed to create an %s converter\n", convName);
1358        return;
1359    }
1360    else
1361    {
1362        if (ucnv_getType(myConverter)!=convType) {
1363            log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1364                convName, convType);
1365        }
1366        else {
1367            log_verbose("ucnv_getType %s ok\n", convName);
1368        }
1369    }
1370    ucnv_close(myConverter);
1371}
1372
1373static void TestConverterTypesAndStarters()
1374{
1375#if !UCONFIG_NO_LEGACY_CONVERSION
1376    UConverter* myConverter;
1377    UErrorCode err = U_ZERO_ERROR;
1378    UBool mystarters[256];
1379
1380/*    const UBool expectedKSCstarters[256] = {
1381        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1386        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1387        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1388        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1389        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1390        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1391        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1392        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1393        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1394        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1395        FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1397        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1398        TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1399        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1400        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1401        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1402        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1403        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1404        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1405        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1406        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1407
1408
1409    log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1410
1411    myConverter = ucnv_open("ksc", &err);
1412    if (U_FAILURE(err)) {
1413      log_data_err("Failed to create an ibm-ksc converter\n");
1414      return;
1415    }
1416    else
1417    {
1418        if (ucnv_getType(myConverter)!=UCNV_MBCS)
1419            log_err("ucnv_getType Failed for ibm-949\n");
1420        else
1421            log_verbose("ucnv_getType ibm-949 ok\n");
1422
1423        if(myConverter!=NULL)
1424            ucnv_getStarters(myConverter, mystarters, &err);
1425
1426        /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1427          log_err("Failed ucnv_getStarters for ksc\n");
1428          else
1429          log_verbose("ucnv_getStarters ok\n");*/
1430
1431    }
1432    ucnv_close(myConverter);
1433
1434    TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1435    TestConverterType("ibm-878", UCNV_SBCS);
1436#endif
1437
1438    TestConverterType("iso-8859-1", UCNV_LATIN_1);
1439
1440    TestConverterType("ibm-1208", UCNV_UTF8);
1441
1442    TestConverterType("utf-8", UCNV_UTF8);
1443    TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1444    TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1445    TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1446    TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1447
1448#if !UCONFIG_NO_LEGACY_CONVERSION
1449
1450#if defined(U_ENABLE_GENERIC_ISO_2022)
1451    TestConverterType("iso-2022", UCNV_ISO_2022);
1452#endif
1453
1454    TestConverterType("hz", UCNV_HZ);
1455#endif
1456
1457    TestConverterType("scsu", UCNV_SCSU);
1458
1459#if !UCONFIG_NO_LEGACY_CONVERSION
1460    TestConverterType("x-iscii-de", UCNV_ISCII);
1461#endif
1462
1463    TestConverterType("ascii", UCNV_US_ASCII);
1464    TestConverterType("utf-7", UCNV_UTF7);
1465    TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1466    TestConverterType("bocu-1", UCNV_BOCU1);
1467}
1468
1469static void
1470TestAmbiguousConverter(UConverter *cnv) {
1471    static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1472    UChar outUnicode[20]={ 0, 0, 0, 0 };
1473
1474    const char *s;
1475    UChar *u;
1476    UErrorCode errorCode;
1477    UBool isAmbiguous;
1478
1479    /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1480    errorCode=U_ZERO_ERROR;
1481    s=inBytes;
1482    u=outUnicode;
1483    ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1484    if(U_FAILURE(errorCode)) {
1485        /* we do not care about general failures in this test; the input may just not be mappable */
1486        return;
1487    }
1488
1489    if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1490        /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1491        /* There are some encodings that are partially ASCII based,
1492        like the ISO-7 and GSM series of codepages, which we ignore. */
1493        return;
1494    }
1495
1496    isAmbiguous=ucnv_isAmbiguous(cnv);
1497
1498    /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1499    if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1500        log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1501            ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1502        return;
1503    }
1504
1505    if(outUnicode[2]!=0x5c) {
1506        /* needs fixup, fix it */
1507        ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1508        if(outUnicode[2]!=0x5c) {
1509            /* the fix failed */
1510            log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1511            return;
1512        }
1513    }
1514}
1515
1516static void TestAmbiguous()
1517{
1518    UErrorCode status = U_ZERO_ERROR;
1519    UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1520    static const char target[] = {
1521        /* "\\usr\\local\\share\\data\\icutest.txt" */
1522        0x5c, 0x75, 0x73, 0x72,
1523        0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1524        0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1525        0x5c, 0x64, 0x61, 0x74, 0x61,
1526        0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1527        0
1528    };
1529    UChar asciiResult[200], sjisResult[200];
1530    int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1531    const char *name;
1532
1533    /* enumerate all converters */
1534    status=U_ZERO_ERROR;
1535    for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1536        cnv=ucnv_open(name, &status);
1537        if(U_SUCCESS(status)) {
1538            /* BEGIN android-changed
1539               To save space, Android does not build full ISO2022 CJK tables.
1540               We skip the tests for ISO-2022. */
1541            const char* cnvName = ucnv_getName(cnv, &status);
1542            if (strlen(cnvName) < 8 ||
1543                strncmp(cnvName, "ISO_2022", 8) != 0) {
1544                TestAmbiguousConverter(cnv);
1545            }
1546            /* END android-changed */
1547        } else {
1548            log_err("error: unable to open available converter \"%s\"\n", name);
1549            status=U_ZERO_ERROR;
1550        }
1551    }
1552
1553#if !UCONFIG_NO_LEGACY_CONVERSION
1554    sjis_cnv = ucnv_open("ibm-943", &status);
1555    if (U_FAILURE(status))
1556    {
1557        log_data_err("Failed to create a SJIS converter\n");
1558        return;
1559    }
1560    ascii_cnv = ucnv_open("LATIN-1", &status);
1561    if (U_FAILURE(status))
1562    {
1563        log_data_err("Failed to create a LATIN-1 converter\n");
1564        ucnv_close(sjis_cnv);
1565        return;
1566    }
1567    /* convert target from SJIS to Unicode */
1568    sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1569    if (U_FAILURE(status))
1570    {
1571        log_err("Failed to convert the SJIS string.\n");
1572        ucnv_close(sjis_cnv);
1573        ucnv_close(ascii_cnv);
1574        return;
1575    }
1576    /* convert target from Latin-1 to Unicode */
1577    /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1578    if (U_FAILURE(status))
1579    {
1580        log_err("Failed to convert the Latin-1 string.\n");
1581        ucnv_close(sjis_cnv);
1582        ucnv_close(ascii_cnv);
1583        return;
1584    }
1585    if (!ucnv_isAmbiguous(sjis_cnv))
1586    {
1587        log_err("SJIS converter should contain ambiguous character mappings.\n");
1588        ucnv_close(sjis_cnv);
1589        ucnv_close(ascii_cnv);
1590        return;
1591    }
1592    if (u_strcmp(sjisResult, asciiResult) == 0)
1593    {
1594        log_err("File separators for SJIS don't need to be fixed.\n");
1595    }
1596    ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1597    if (u_strcmp(sjisResult, asciiResult) != 0)
1598    {
1599        log_err("Fixing file separator for SJIS failed.\n");
1600    }
1601    ucnv_close(sjis_cnv);
1602    ucnv_close(ascii_cnv);
1603#endif
1604}
1605
1606static void
1607TestSignatureDetection(){
1608    /* with null terminated strings */
1609    {
1610        static const char* data[] = {
1611                "\xFE\xFF\x00\x00",     /* UTF-16BE */
1612                "\xFF\xFE\x00\x00",     /* UTF-16LE */
1613                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1614                "\x0E\xFE\xFF\x00",     /* SCSU     */
1615
1616                "\xFE\xFF",             /* UTF-16BE */
1617                "\xFF\xFE",             /* UTF-16LE */
1618                "\xEF\xBB\xBF",         /* UTF-8    */
1619                "\x0E\xFE\xFF",         /* SCSU     */
1620
1621                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1622                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1623                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1624                "\x0E\xFE\xFF\x41",     /* SCSU     */
1625
1626                "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1627                "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1628                "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1629                "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1630                "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1631
1632                "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1633        };
1634        static const char* expected[] = {
1635                "UTF-16BE",
1636                "UTF-16LE",
1637                "UTF-8",
1638                "SCSU",
1639
1640                "UTF-16BE",
1641                "UTF-16LE",
1642                "UTF-8",
1643                "SCSU",
1644
1645                "UTF-16BE",
1646                "UTF-16LE",
1647                "UTF-8",
1648                "SCSU",
1649
1650                "UTF-7",
1651                "UTF-7",
1652                "UTF-7",
1653                "UTF-7",
1654                "UTF-7",
1655                "UTF-EBCDIC"
1656        };
1657        static const int32_t expectedLength[] ={
1658            2,
1659            2,
1660            3,
1661            3,
1662
1663            2,
1664            2,
1665            3,
1666            3,
1667
1668            2,
1669            2,
1670            3,
1671            3,
1672
1673            5,
1674            4,
1675            4,
1676            4,
1677            4,
1678            4
1679        };
1680        int i=0;
1681        UErrorCode err;
1682        int32_t signatureLength = -1;
1683        const char* source = NULL;
1684        const char* enc = NULL;
1685        for( ; i<sizeof(data)/sizeof(char*); i++){
1686            err = U_ZERO_ERROR;
1687            source = data[i];
1688            enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1689            if(U_FAILURE(err)){
1690                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1691                continue;
1692            }
1693            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1694                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1695                continue;
1696            }
1697            if(signatureLength != expectedLength[i]){
1698                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1699            }
1700        }
1701    }
1702    {
1703        static const char* data[] = {
1704                "\xFE\xFF\x00",         /* UTF-16BE */
1705                "\xFF\xFE\x00",         /* UTF-16LE */
1706                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1707                "\x0E\xFE\xFF\x00",     /* SCSU     */
1708                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1709                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1710                "\xFE\xFF",             /* UTF-16BE */
1711                "\xFF\xFE",             /* UTF-16LE */
1712                "\xEF\xBB\xBF",         /* UTF-8    */
1713                "\x0E\xFE\xFF",         /* SCSU     */
1714                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1715                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1716                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1717                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1718                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1719                "\x0E\xFE\xFF\x41",     /* SCSU     */
1720                "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1721                "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1722                "\xFB\xEE\x28",         /* BOCU-1   */
1723                "\xFF\x41\x42"          /* NULL     */
1724        };
1725        static const int len[] = {
1726            3,
1727            3,
1728            4,
1729            4,
1730            4,
1731            4,
1732            2,
1733            2,
1734            3,
1735            3,
1736            4,
1737            4,
1738            4,
1739            4,
1740            4,
1741            4,
1742            5,
1743            5,
1744            3,
1745            3
1746        };
1747
1748        static const char* expected[] = {
1749                "UTF-16BE",
1750                "UTF-16LE",
1751                "UTF-8",
1752                "SCSU",
1753                "UTF-32BE",
1754                "UTF-32LE",
1755                "UTF-16BE",
1756                "UTF-16LE",
1757                "UTF-8",
1758                "SCSU",
1759                "UTF-32BE",
1760                "UTF-32LE",
1761                "UTF-16BE",
1762                "UTF-16LE",
1763                "UTF-8",
1764                "SCSU",
1765                "UTF-32BE",
1766                "UTF-32LE",
1767                "BOCU-1",
1768                NULL
1769        };
1770        static const int32_t expectedLength[] ={
1771            2,
1772            2,
1773            3,
1774            3,
1775            4,
1776            4,
1777            2,
1778            2,
1779            3,
1780            3,
1781            4,
1782            4,
1783            2,
1784            2,
1785            3,
1786            3,
1787            4,
1788            4,
1789            3,
1790            0
1791        };
1792        int i=0;
1793        UErrorCode err;
1794        int32_t signatureLength = -1;
1795        int32_t sourceLength=-1;
1796        const char* source = NULL;
1797        const char* enc = NULL;
1798        for( ; i<sizeof(data)/sizeof(char*); i++){
1799            err = U_ZERO_ERROR;
1800            source = data[i];
1801            sourceLength = len[i];
1802            enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1803            if(U_FAILURE(err)){
1804                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1805                continue;
1806            }
1807            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1808                if(expected[i] !=NULL){
1809                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1810                 continue;
1811                }
1812            }
1813            if(signatureLength != expectedLength[i]){
1814                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1815            }
1816        }
1817    }
1818}
1819
1820static void TestUTF7() {
1821    /* test input */
1822    static const uint8_t in[]={
1823        /* H - +Jjo- - ! +- +2AHcAQ */
1824        0x48,
1825        0x2d,
1826        0x2b, 0x4a, 0x6a, 0x6f,
1827        0x2d, 0x2d,
1828        0x21,
1829        0x2b, 0x2d,
1830        0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1831    };
1832
1833    /* expected test results */
1834    static const int32_t results[]={
1835        /* number of bytes read, code point */
1836        1, 0x48,
1837        1, 0x2d,
1838        4, 0x263a, /* <WHITE SMILING FACE> */
1839        2, 0x2d,
1840        1, 0x21,
1841        2, 0x2b,
1842        7, 0x10401
1843    };
1844
1845    const char *cnvName;
1846    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1847    UErrorCode errorCode=U_ZERO_ERROR;
1848    UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1849    if(U_FAILURE(errorCode)) {
1850        log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1851        return;
1852    }
1853    TestNextUChar(cnv, source, limit, results, "UTF-7");
1854    /* Test the condition when source >= sourceLimit */
1855    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1856    cnvName = ucnv_getName(cnv, &errorCode);
1857    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1858        log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1859    }
1860    ucnv_close(cnv);
1861}
1862
1863static void TestIMAP() {
1864    /* test input */
1865    static const uint8_t in[]={
1866        /* H - &Jjo- - ! &- &2AHcAQ- \ */
1867        0x48,
1868        0x2d,
1869        0x26, 0x4a, 0x6a, 0x6f,
1870        0x2d, 0x2d,
1871        0x21,
1872        0x26, 0x2d,
1873        0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1874    };
1875
1876    /* expected test results */
1877    static const int32_t results[]={
1878        /* number of bytes read, code point */
1879        1, 0x48,
1880        1, 0x2d,
1881        4, 0x263a, /* <WHITE SMILING FACE> */
1882        2, 0x2d,
1883        1, 0x21,
1884        2, 0x26,
1885        7, 0x10401
1886    };
1887
1888    const char *cnvName;
1889    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1890    UErrorCode errorCode=U_ZERO_ERROR;
1891    UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1892    if(U_FAILURE(errorCode)) {
1893        log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1894        return;
1895    }
1896    TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1897    /* Test the condition when source >= sourceLimit */
1898    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1899    cnvName = ucnv_getName(cnv, &errorCode);
1900    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1901        log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1902    }
1903    ucnv_close(cnv);
1904}
1905
1906static void TestUTF8() {
1907    /* test input */
1908    static const uint8_t in[]={
1909        0x61,
1910        0xc2, 0x80,
1911        0xe0, 0xa0, 0x80,
1912        0xf0, 0x90, 0x80, 0x80,
1913        0xf4, 0x84, 0x8c, 0xa1,
1914        0xf0, 0x90, 0x90, 0x81
1915    };
1916
1917    /* expected test results */
1918    static const int32_t results[]={
1919        /* number of bytes read, code point */
1920        1, 0x61,
1921        2, 0x80,
1922        3, 0x800,
1923        4, 0x10000,
1924        4, 0x104321,
1925        4, 0x10401
1926    };
1927
1928    /* error test input */
1929    static const uint8_t in2[]={
1930        0x61,
1931        0xc0, 0x80,                     /* illegal non-shortest form */
1932        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1933        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1934        0xc0, 0xc0,                     /* illegal trail byte */
1935        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1936        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1937        0xfe,                           /* illegal byte altogether */
1938        0x62
1939    };
1940
1941    /* expected error test results */
1942    static const int32_t results2[]={
1943        /* number of bytes read, code point */
1944        1, 0x61,
1945        22, 0x62
1946    };
1947
1948    UConverterToUCallback cb;
1949    const void *p;
1950
1951    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1952    UErrorCode errorCode=U_ZERO_ERROR;
1953    UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1954    if(U_FAILURE(errorCode)) {
1955        log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1956        return;
1957    }
1958    TestNextUChar(cnv, source, limit, results, "UTF-8");
1959    /* Test the condition when source >= sourceLimit */
1960    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1961
1962    /* test error behavior with a skip callback */
1963    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1964    source=(const char *)in2;
1965    limit=(const char *)(in2+sizeof(in2));
1966    TestNextUChar(cnv, source, limit, results2, "UTF-8");
1967
1968    ucnv_close(cnv);
1969}
1970
1971static void TestCESU8() {
1972    /* test input */
1973    static const uint8_t in[]={
1974        0x61,
1975        0xc2, 0x80,
1976        0xe0, 0xa0, 0x80,
1977        0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1978        0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1979        0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1980        0xef, 0xbf, 0xbc
1981    };
1982
1983    /* expected test results */
1984    static const int32_t results[]={
1985        /* number of bytes read, code point */
1986        1, 0x61,
1987        2, 0x80,
1988        3, 0x800,
1989        6, 0x10000,
1990        3, 0xdc01,
1991        -1,0xd802,  /* may read 3 or 6 bytes */
1992        -1,0x10ffff,/* may read 0 or 3 bytes */
1993        3, 0xfffc
1994    };
1995
1996    /* error test input */
1997    static const uint8_t in2[]={
1998        0x61,
1999        0xc0, 0x80,                     /* illegal non-shortest form */
2000        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
2001        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
2002        0xc0, 0xc0,                     /* illegal trail byte */
2003        0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
2004        0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
2005        0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
2006        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
2007        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
2008        0xfe,                           /* illegal byte altogether */
2009        0x62
2010    };
2011
2012    /* expected error test results */
2013    static const int32_t results2[]={
2014        /* number of bytes read, code point */
2015        1, 0x61,
2016        34, 0x62
2017    };
2018
2019    UConverterToUCallback cb;
2020    const void *p;
2021
2022    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2023    UErrorCode errorCode=U_ZERO_ERROR;
2024    UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2025    if(U_FAILURE(errorCode)) {
2026        log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2027        return;
2028    }
2029    TestNextUChar(cnv, source, limit, results, "CESU-8");
2030    /* Test the condition when source >= sourceLimit */
2031    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2032
2033    /* test error behavior with a skip callback */
2034    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2035    source=(const char *)in2;
2036    limit=(const char *)(in2+sizeof(in2));
2037    TestNextUChar(cnv, source, limit, results2, "CESU-8");
2038
2039    ucnv_close(cnv);
2040}
2041
2042static void TestUTF16() {
2043    /* test input */
2044    static const uint8_t in1[]={
2045        0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2046    };
2047    static const uint8_t in2[]={
2048        0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2049    };
2050    static const uint8_t in3[]={
2051        0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2052    };
2053
2054    /* expected test results */
2055    static const int32_t results1[]={
2056        /* number of bytes read, code point */
2057        4, 0x4e00,
2058        2, 0xfeff
2059    };
2060    static const int32_t results2[]={
2061        /* number of bytes read, code point */
2062        4, 0x004e,
2063        2, 0xfffe
2064    };
2065    static const int32_t results3[]={
2066        /* number of bytes read, code point */
2067        2, 0xfefe,
2068        2, 0x4e00,
2069        2, 0xfeff,
2070        4, 0x20001
2071    };
2072
2073    const char *source, *limit;
2074
2075    UErrorCode errorCode=U_ZERO_ERROR;
2076    UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2077    if(U_FAILURE(errorCode)) {
2078        log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2079        return;
2080    }
2081
2082    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2083    TestNextUChar(cnv, source, limit, results1, "UTF-16");
2084
2085    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2086    ucnv_resetToUnicode(cnv);
2087    TestNextUChar(cnv, source, limit, results2, "UTF-16");
2088
2089    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2090    ucnv_resetToUnicode(cnv);
2091    TestNextUChar(cnv, source, limit, results3, "UTF-16");
2092
2093    /* Test the condition when source >= sourceLimit */
2094    ucnv_resetToUnicode(cnv);
2095    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2096
2097    ucnv_close(cnv);
2098}
2099
2100static void TestUTF16BE() {
2101    /* test input */
2102    static const uint8_t in[]={
2103        0x00, 0x61,
2104        0x00, 0xc0,
2105        0x00, 0x31,
2106        0x00, 0xf4,
2107        0xce, 0xfe,
2108        0xd8, 0x01, 0xdc, 0x01
2109    };
2110
2111    /* expected test results */
2112    static const int32_t results[]={
2113        /* number of bytes read, code point */
2114        2, 0x61,
2115        2, 0xc0,
2116        2, 0x31,
2117        2, 0xf4,
2118        2, 0xcefe,
2119        4, 0x10401
2120    };
2121
2122    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2123    UErrorCode errorCode=U_ZERO_ERROR;
2124    UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2125    if(U_FAILURE(errorCode)) {
2126        log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2127        return;
2128    }
2129    TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2130    /* Test the condition when source >= sourceLimit */
2131    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2132    /*Test for the condition where there is an invalid character*/
2133    {
2134        static const uint8_t source2[]={0x61};
2135        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2136        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2137    }
2138#if 0
2139    /*
2140     * Test disabled because currently the UTF-16BE/LE converters are supposed
2141     * to not set errors for unpaired surrogates.
2142     * This may change with
2143     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2144     */
2145
2146    /*Test for the condition where there is a surrogate pair*/
2147    {
2148        const uint8_t source2[]={0xd8, 0x01};
2149        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2150    }
2151#endif
2152    ucnv_close(cnv);
2153}
2154
2155static void
2156TestUTF16LE() {
2157    /* test input */
2158    static const uint8_t in[]={
2159        0x61, 0x00,
2160        0x31, 0x00,
2161        0x4e, 0x2e,
2162        0x4e, 0x00,
2163        0x01, 0xd8, 0x01, 0xdc
2164    };
2165
2166    /* expected test results */
2167    static const int32_t results[]={
2168        /* number of bytes read, code point */
2169        2, 0x61,
2170        2, 0x31,
2171        2, 0x2e4e,
2172        2, 0x4e,
2173        4, 0x10401
2174    };
2175
2176    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2177    UErrorCode errorCode=U_ZERO_ERROR;
2178    UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2179    if(U_FAILURE(errorCode)) {
2180        log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2181        return;
2182    }
2183    TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2184    /* Test the condition when source >= sourceLimit */
2185    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2186    /*Test for the condition where there is an invalid character*/
2187    {
2188        static const uint8_t source2[]={0x61};
2189        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2190        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2191    }
2192#if 0
2193    /*
2194     * Test disabled because currently the UTF-16BE/LE converters are supposed
2195     * to not set errors for unpaired surrogates.
2196     * This may change with
2197     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2198     */
2199
2200    /*Test for the condition where there is a surrogate character*/
2201    {
2202        static const uint8_t source2[]={0x01, 0xd8};
2203        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2204    }
2205#endif
2206
2207    ucnv_close(cnv);
2208}
2209
2210static void TestUTF32() {
2211    /* test input */
2212    static const uint8_t in1[]={
2213        0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2214    };
2215    static const uint8_t in2[]={
2216        0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2217    };
2218    static const uint8_t in3[]={
2219        0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2220    };
2221
2222    /* expected test results */
2223    static const int32_t results1[]={
2224        /* number of bytes read, code point */
2225        8, 0x100f00,
2226        4, 0xfeff
2227    };
2228    static const int32_t results2[]={
2229        /* number of bytes read, code point */
2230        8, 0x0f1000,
2231        4, 0xfffe
2232    };
2233    static const int32_t results3[]={
2234        /* number of bytes read, code point */
2235        4, 0xfefe,
2236        4, 0x100f00,
2237        4, 0xfffd, /* unmatched surrogate */
2238        4, 0xfffd  /* unmatched surrogate */
2239    };
2240
2241    const char *source, *limit;
2242
2243    UErrorCode errorCode=U_ZERO_ERROR;
2244    UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2245    if(U_FAILURE(errorCode)) {
2246        log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2247        return;
2248    }
2249
2250    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2251    TestNextUChar(cnv, source, limit, results1, "UTF-32");
2252
2253    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2254    ucnv_resetToUnicode(cnv);
2255    TestNextUChar(cnv, source, limit, results2, "UTF-32");
2256
2257    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2258    ucnv_resetToUnicode(cnv);
2259    TestNextUChar(cnv, source, limit, results3, "UTF-32");
2260
2261    /* Test the condition when source >= sourceLimit */
2262    ucnv_resetToUnicode(cnv);
2263    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2264
2265    ucnv_close(cnv);
2266}
2267
2268static void
2269TestUTF32BE() {
2270    /* test input */
2271    static const uint8_t in[]={
2272        0x00, 0x00, 0x00, 0x61,
2273        0x00, 0x00, 0x30, 0x61,
2274        0x00, 0x00, 0xdc, 0x00,
2275        0x00, 0x00, 0xd8, 0x00,
2276        0x00, 0x00, 0xdf, 0xff,
2277        0x00, 0x00, 0xff, 0xfe,
2278        0x00, 0x10, 0xab, 0xcd,
2279        0x00, 0x10, 0xff, 0xff
2280    };
2281
2282    /* expected test results */
2283    static const int32_t results[]={
2284        /* number of bytes read, code point */
2285        4, 0x61,
2286        4, 0x3061,
2287        4, 0xfffd,
2288        4, 0xfffd,
2289        4, 0xfffd,
2290        4, 0xfffe,
2291        4, 0x10abcd,
2292        4, 0x10ffff
2293    };
2294
2295    /* error test input */
2296    static const uint8_t in2[]={
2297        0x00, 0x00, 0x00, 0x61,
2298        0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2299        0x00, 0x00, 0x00, 0x62,
2300        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2301        0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2302        0x00, 0x00, 0x01, 0x62,
2303        0x00, 0x00, 0x02, 0x62
2304    };
2305
2306    /* expected error test results */
2307    static const int32_t results2[]={
2308        /* number of bytes read, code point */
2309        4,  0x61,
2310        8,  0x62,
2311        12, 0x162,
2312        4,  0x262
2313    };
2314
2315    UConverterToUCallback cb;
2316    const void *p;
2317
2318    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2319    UErrorCode errorCode=U_ZERO_ERROR;
2320    UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2321    if(U_FAILURE(errorCode)) {
2322        log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2323        return;
2324    }
2325    TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2326
2327    /* Test the condition when source >= sourceLimit */
2328    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2329
2330    /* test error behavior with a skip callback */
2331    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2332    source=(const char *)in2;
2333    limit=(const char *)(in2+sizeof(in2));
2334    TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2335
2336    ucnv_close(cnv);
2337}
2338
2339static void
2340TestUTF32LE() {
2341    /* test input */
2342    static const uint8_t in[]={
2343        0x61, 0x00, 0x00, 0x00,
2344        0x61, 0x30, 0x00, 0x00,
2345        0x00, 0xdc, 0x00, 0x00,
2346        0x00, 0xd8, 0x00, 0x00,
2347        0xff, 0xdf, 0x00, 0x00,
2348        0xfe, 0xff, 0x00, 0x00,
2349        0xcd, 0xab, 0x10, 0x00,
2350        0xff, 0xff, 0x10, 0x00
2351    };
2352
2353    /* expected test results */
2354    static const int32_t results[]={
2355        /* number of bytes read, code point */
2356        4, 0x61,
2357        4, 0x3061,
2358        4, 0xfffd,
2359        4, 0xfffd,
2360        4, 0xfffd,
2361        4, 0xfffe,
2362        4, 0x10abcd,
2363        4, 0x10ffff
2364    };
2365
2366    /* error test input */
2367    static const uint8_t in2[]={
2368        0x61, 0x00, 0x00, 0x00,
2369        0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2370        0x62, 0x00, 0x00, 0x00,
2371        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2372        0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2373        0x62, 0x01, 0x00, 0x00,
2374        0x62, 0x02, 0x00, 0x00,
2375    };
2376
2377    /* expected error test results */
2378    static const int32_t results2[]={
2379        /* number of bytes read, code point */
2380        4,  0x61,
2381        8,  0x62,
2382        12, 0x162,
2383        4,  0x262,
2384    };
2385
2386    UConverterToUCallback cb;
2387    const void *p;
2388
2389    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2390    UErrorCode errorCode=U_ZERO_ERROR;
2391    UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2392    if(U_FAILURE(errorCode)) {
2393        log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2394        return;
2395    }
2396    TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2397
2398    /* Test the condition when source >= sourceLimit */
2399    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2400
2401    /* test error behavior with a skip callback */
2402    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2403    source=(const char *)in2;
2404    limit=(const char *)(in2+sizeof(in2));
2405    TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2406
2407    ucnv_close(cnv);
2408}
2409
2410static void
2411TestLATIN1() {
2412    /* test input */
2413    static const uint8_t in[]={
2414       0x61,
2415       0x31,
2416       0x32,
2417       0xc0,
2418       0xf0,
2419       0xf4,
2420    };
2421
2422    /* expected test results */
2423    static const int32_t results[]={
2424        /* number of bytes read, code point */
2425        1, 0x61,
2426        1, 0x31,
2427        1, 0x32,
2428        1, 0xc0,
2429        1, 0xf0,
2430        1, 0xf4,
2431    };
2432    static const uint16_t in1[] = {
2433        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2434        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2435        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2436        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2437        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2438        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2439        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2440        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2441        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2442        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2443        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2444        0xcb, 0x82
2445    };
2446    static const uint8_t out1[] = {
2447        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2448        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2449        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2450        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2451        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2452        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2453        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2454        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2455        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2456        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2457        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2458        0xcb, 0x82
2459    };
2460    static const uint16_t in2[]={
2461        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2462        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2463        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2464        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2465        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2466        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2467        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2468        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2469        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2470        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2471        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2472        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2473        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2474        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2475        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2476        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2477        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2478        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2479        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2480        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2481        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2482        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2483        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2484        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2485        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2486        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2487        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2488        0x37, 0x20, 0x2A, 0x2F,
2489    };
2490    static const unsigned char out2[]={
2491        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2492        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2493        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2494        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2495        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2496        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2497        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2498        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2499        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2500        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2501        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2502        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2503        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2504        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2505        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2506        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2507        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2508        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2509        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2510        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2511        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2512        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2513        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2514        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2515        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2516        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2517        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2518        0x37, 0x20, 0x2A, 0x2F,
2519    };
2520    const char *source=(const char *)in;
2521    const char *limit=(const char *)in+sizeof(in);
2522
2523    UErrorCode errorCode=U_ZERO_ERROR;
2524    UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2525    if(U_FAILURE(errorCode)) {
2526        log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2527        return;
2528    }
2529    TestNextUChar(cnv, source, limit, results, "LATIN_1");
2530    /* Test the condition when source >= sourceLimit */
2531    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2532    TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2533    TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2534
2535    ucnv_close(cnv);
2536}
2537
2538static void
2539TestSBCS() {
2540    /* test input */
2541    static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2542    /* expected test results */
2543    static const int32_t results[]={
2544        /* number of bytes read, code point */
2545        1, 0x61,
2546        1, 0xbf,
2547        1, 0xc4,
2548        1, 0x2021,
2549        1, 0xf8ff,
2550        1, 0x00d9
2551    };
2552
2553    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2554    UErrorCode errorCode=U_ZERO_ERROR;
2555    UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2556    if(U_FAILURE(errorCode)) {
2557        log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2558        return;
2559    }
2560    TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2561    /* Test the condition when source >= sourceLimit */
2562    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2563    /*Test for Illegal character */ /*
2564    {
2565    static const uint8_t input1[]={ 0xA1 };
2566    const char* illegalsource=(const char*)input1;
2567    TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2568    }
2569   */
2570    ucnv_close(cnv);
2571}
2572
2573static void
2574TestDBCS() {
2575    /* test input */
2576    static const uint8_t in[]={
2577        0x44, 0x6a,
2578        0xc4, 0x9c,
2579        0x7a, 0x74,
2580        0x46, 0xab,
2581        0x42, 0x5b,
2582
2583    };
2584
2585    /* expected test results */
2586    static const int32_t results[]={
2587        /* number of bytes read, code point */
2588        2, 0x00a7,
2589        2, 0xe1d2,
2590        2, 0x6962,
2591        2, 0xf842,
2592        2, 0xffe5,
2593    };
2594
2595    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2596    UErrorCode errorCode=U_ZERO_ERROR;
2597
2598    UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2599    if(U_FAILURE(errorCode)) {
2600        log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2601        return;
2602    }
2603    TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2604    /* Test the condition when source >= sourceLimit */
2605    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2606    /*Test for the condition where there is an invalid character*/
2607    {
2608        static const uint8_t source2[]={0x1a, 0x1b};
2609        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2610    }
2611    /*Test for the condition where we have a truncated char*/
2612    {
2613        static const uint8_t source1[]={0xc4};
2614        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2615        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2616    }
2617    ucnv_close(cnv);
2618}
2619
2620static void
2621TestMBCS() {
2622    /* test input */
2623    static const uint8_t in[]={
2624        0x01,
2625        0xa6, 0xa3,
2626        0x00,
2627        0xa6, 0xa1,
2628        0x08,
2629        0xc2, 0x76,
2630        0xc2, 0x78,
2631
2632    };
2633
2634    /* expected test results */
2635    static const int32_t results[]={
2636        /* number of bytes read, code point */
2637        1, 0x0001,
2638        2, 0x250c,
2639        1, 0x0000,
2640        2, 0x2500,
2641        1, 0x0008,
2642        2, 0xd60c,
2643        2, 0xd60e,
2644    };
2645
2646    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2647    UErrorCode errorCode=U_ZERO_ERROR;
2648
2649    UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2650    if(U_FAILURE(errorCode)) {
2651        log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2652        return;
2653    }
2654    TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2655    /* Test the condition when source >= sourceLimit */
2656    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2657    /*Test for the condition where there is an invalid character*/
2658    {
2659        static const uint8_t source2[]={0xa1, 0x80};
2660        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2661    }
2662    /*Test for the condition where we have a truncated char*/
2663    {
2664        static const uint8_t source1[]={0xc4};
2665        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2666        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2667    }
2668    ucnv_close(cnv);
2669
2670}
2671
2672#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2673static void
2674TestICCRunout() {
2675/*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2676
2677    const char *cnvName = "ibm-1363";
2678    UErrorCode status = U_ZERO_ERROR;
2679    const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2680    /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2681    const char *source = sourceData;
2682    const char *sourceLim = sourceData+sizeof(sourceData);
2683    UChar c1, c2, c3;
2684    UConverter *cnv=ucnv_open(cnvName, &status);
2685    if(U_FAILURE(status)) {
2686        log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2687	return;
2688    }
2689
2690#if 0
2691    {
2692    UChar   targetBuf[256];
2693    UChar   *target = targetBuf;
2694    UChar   *targetLim = target+256;
2695    ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2696
2697    log_info("After convert: target@%d, source@%d, status%s\n",
2698	     target-targetBuf, source-sourceData, u_errorName(status));
2699
2700    if(U_FAILURE(status)) {
2701	log_err("Failed to convert: %s\n", u_errorName(status));
2702    } else {
2703
2704    }
2705    }
2706#endif
2707
2708    c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2709    log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2710
2711    c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2712    log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2713
2714    c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2715    log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2716
2717    if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2718	log_verbose("OK\n");
2719    } else {
2720	log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2721    }
2722
2723    ucnv_close(cnv);
2724
2725}
2726#endif
2727
2728#ifdef U_ENABLE_GENERIC_ISO_2022
2729
2730static void
2731TestISO_2022() {
2732    /* test input */
2733    static const uint8_t in[]={
2734        0x1b, 0x25, 0x42,
2735        0x31,
2736        0x32,
2737        0x61,
2738        0xc2, 0x80,
2739        0xe0, 0xa0, 0x80,
2740        0xf0, 0x90, 0x80, 0x80
2741    };
2742
2743
2744
2745    /* expected test results */
2746    static const int32_t results[]={
2747        /* number of bytes read, code point */
2748        4, 0x0031,  /* 4 bytes including the escape sequence */
2749        1, 0x0032,
2750        1, 0x61,
2751        2, 0x80,
2752        3, 0x800,
2753        4, 0x10000
2754    };
2755
2756    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2757    UErrorCode errorCode=U_ZERO_ERROR;
2758    UConverter *cnv;
2759
2760    cnv=ucnv_open("ISO_2022", &errorCode);
2761    if(U_FAILURE(errorCode)) {
2762        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2763        return;
2764    }
2765    TestNextUChar(cnv, source, limit, results, "ISO_2022");
2766
2767    /* Test the condition when source >= sourceLimit */
2768    TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2769    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2770    /*Test for the condition where we have a truncated char*/
2771    {
2772        static const uint8_t source1[]={0xc4};
2773        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2774        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2775    }
2776    /*Test for the condition where there is an invalid character*/
2777    {
2778        static const uint8_t source2[]={0xa1, 0x01};
2779        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2780    }
2781    ucnv_close(cnv);
2782}
2783
2784#endif
2785
2786static void
2787TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2788    const UChar* uSource;
2789    const UChar* uSourceLimit;
2790    const char* cSource;
2791    const char* cSourceLimit;
2792    UChar *uTargetLimit =NULL;
2793    UChar *uTarget;
2794    char *cTarget;
2795    const char *cTargetLimit;
2796    char *cBuf;
2797    UChar *uBuf; /*,*test;*/
2798    int32_t uBufSize = 120;
2799    int len=0;
2800    int i=2;
2801    UErrorCode errorCode=U_ZERO_ERROR;
2802    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2803    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2804    ucnv_reset(cnv);
2805    for(;--i>0; ){
2806        uSource = (UChar*) source;
2807        uSourceLimit=(const UChar*)sourceLimit;
2808        cTarget = cBuf;
2809        uTarget = uBuf;
2810        cSource = cBuf;
2811        cTargetLimit = cBuf;
2812        uTargetLimit = uBuf;
2813
2814        do{
2815
2816            cTargetLimit = cTargetLimit+ i;
2817            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2818            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2819               errorCode=U_ZERO_ERROR;
2820                continue;
2821            }
2822
2823            if(U_FAILURE(errorCode)){
2824                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2825                return;
2826            }
2827
2828        }while (uSource<uSourceLimit);
2829
2830        cSourceLimit =cTarget;
2831        do{
2832            uTargetLimit=uTargetLimit+i;
2833            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2834            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2835               errorCode=U_ZERO_ERROR;
2836                continue;
2837            }
2838            if(U_FAILURE(errorCode)){
2839                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2840                    return;
2841            }
2842        }while(cSource<cSourceLimit);
2843
2844        uSource = source;
2845        /*test =uBuf;*/
2846        for(len=0;len<(int)(source - sourceLimit);len++){
2847            if(uBuf[len]!=uSource[len]){
2848                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2849            }
2850        }
2851    }
2852    free(uBuf);
2853    free(cBuf);
2854}
2855/* Test for Jitterbug 778 */
2856static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2857    const UChar* uSource;
2858    const UChar* uSourceLimit;
2859    const char* cSource;
2860    UChar *uTargetLimit =NULL;
2861    UChar *uTarget;
2862    char *cTarget;
2863    const char *cTargetLimit;
2864    char *cBuf;
2865    UChar *uBuf,*test;
2866    int32_t uBufSize = 120;
2867    int numCharsInTarget=0;
2868    UErrorCode errorCode=U_ZERO_ERROR;
2869    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2870    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2871    uSource = source;
2872    uSourceLimit=sourceLimit;
2873    cTarget = cBuf;
2874    cTargetLimit = cBuf +uBufSize*5;
2875    uTarget = uBuf;
2876    uTargetLimit = uBuf+ uBufSize*5;
2877    ucnv_reset(cnv);
2878    numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2879    if(U_FAILURE(errorCode)){
2880        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2881        return;
2882    }
2883    cSource = cBuf;
2884    test =uBuf;
2885    ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2886    if(U_FAILURE(errorCode)){
2887        log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2888        return;
2889    }
2890    uSource = source;
2891    while(uSource<uSourceLimit){
2892        if(*test!=*uSource){
2893
2894            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2895        }
2896        uSource++;
2897        test++;
2898    }
2899    free(uBuf);
2900    free(cBuf);
2901}
2902
2903static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2904    const UChar* uSource;
2905    const UChar* uSourceLimit;
2906    const char* cSource;
2907    const char* cSourceLimit;
2908    UChar *uTargetLimit =NULL;
2909    UChar *uTarget;
2910    char *cTarget;
2911    const char *cTargetLimit;
2912    char *cBuf;
2913    UChar *uBuf; /*,*test;*/
2914    int32_t uBufSize = 120;
2915    int len=0;
2916    int i=2;
2917    const UChar *temp = sourceLimit;
2918    UErrorCode errorCode=U_ZERO_ERROR;
2919    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2920    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2921
2922    ucnv_reset(cnv);
2923    for(;--i>0;){
2924        uSource = (UChar*) source;
2925        cTarget = cBuf;
2926        uTarget = uBuf;
2927        cSource = cBuf;
2928        cTargetLimit = cBuf;
2929        uTargetLimit = uBuf+uBufSize*5;
2930        cTargetLimit = cTargetLimit+uBufSize*10;
2931        uSourceLimit=uSource;
2932        do{
2933
2934            if (uSourceLimit < sourceLimit) {
2935                uSourceLimit = uSourceLimit+1;
2936            }
2937            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2938            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2939               errorCode=U_ZERO_ERROR;
2940                continue;
2941            }
2942
2943            if(U_FAILURE(errorCode)){
2944                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2945                return;
2946            }
2947
2948        }while (uSource<temp);
2949
2950        cSourceLimit =cBuf;
2951        do{
2952            if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2953                cSourceLimit = cSourceLimit+1;
2954            }
2955            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2956            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2957               errorCode=U_ZERO_ERROR;
2958                continue;
2959            }
2960            if(U_FAILURE(errorCode)){
2961                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2962                    return;
2963            }
2964        }while(cSource<cTarget);
2965
2966        uSource = source;
2967        /*test =uBuf;*/
2968        for(;len<(int)(source - sourceLimit);len++){
2969            if(uBuf[len]!=uSource[len]){
2970                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2971            }
2972        }
2973    }
2974    free(uBuf);
2975    free(cBuf);
2976}
2977static void
2978TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2979                     const uint16_t results[], const char* message){
2980/*     const char* s0; */
2981     const char* s=(char*)source;
2982     const uint16_t *r=results;
2983     UErrorCode errorCode=U_ZERO_ERROR;
2984     uint32_t c,exC;
2985     ucnv_reset(cnv);
2986     while(s<limit) {
2987	 /* s0=s; */
2988        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2989        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2990            break; /* no more significant input */
2991        } else if(U_FAILURE(errorCode)) {
2992            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2993            break;
2994        } else {
2995            if(UTF_IS_FIRST_SURROGATE(*r)){
2996                int i =0, len = 2;
2997                UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
2998                r++;
2999            }else{
3000                exC = *r;
3001            }
3002            if(c!=(uint32_t)(exC))
3003                log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
3004        }
3005        r++;
3006    }
3007}
3008
3009static int TestJitterbug930(const char* enc){
3010    UErrorCode err = U_ZERO_ERROR;
3011    UConverter*converter;
3012    char out[80];
3013    char*target = out;
3014    UChar in[4];
3015    const UChar*source = in;
3016    int32_t off[80];
3017    int32_t* offsets = off;
3018    int numOffWritten=0;
3019    UBool flush = 0;
3020    converter = my_ucnv_open(enc, &err);
3021
3022    in[0] = 0x41;     /* 0x4E00;*/
3023    in[1] = 0x4E01;
3024    in[2] = 0x4E02;
3025    in[3] = 0x4E03;
3026
3027    memset(off, '*', sizeof(off));
3028
3029    ucnv_fromUnicode (converter,
3030            &target,
3031            target+2,
3032            &source,
3033            source+3,
3034            offsets,
3035            flush,
3036            &err);
3037
3038        /* writes three bytes into the output buffer: 41 1B 24
3039        * but offsets contains 0 1 1
3040    */
3041    while(*offsets< off[10]){
3042        numOffWritten++;
3043        offsets++;
3044    }
3045    log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3046    if(numOffWritten!= (int)(target-out)){
3047        log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3048    }
3049
3050    err = U_ZERO_ERROR;
3051
3052    memset(off,'*' , sizeof(off));
3053
3054    flush = 1;
3055    offsets=off;
3056    ucnv_fromUnicode (converter,
3057            &target,
3058            target+4,
3059            &source,
3060            source,
3061            offsets,
3062            flush,
3063            &err);
3064    numOffWritten=0;
3065    while(*offsets< off[10]){
3066        numOffWritten++;
3067        if(*offsets!= -1){
3068            log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3069        }
3070        offsets++;
3071    }
3072
3073    /* writes 42 43 7A into output buffer,
3074     * offsets contains -1 -1 -1
3075     */
3076    ucnv_close(converter);
3077    return 0;
3078}
3079
3080static void
3081TestHZ() {
3082    /* test input */
3083    static const uint16_t in[]={
3084            0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3085            0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3086            0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3087            0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3088            0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3089            0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3090            0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3091            0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3092            0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3093            0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3094            0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3095            0x005A, 0x005B, 0x005C, 0x000A
3096      };
3097    const UChar* uSource;
3098    const UChar* uSourceLimit;
3099    const char* cSource;
3100    const char* cSourceLimit;
3101    UChar *uTargetLimit =NULL;
3102    UChar *uTarget;
3103    char *cTarget;
3104    const char *cTargetLimit;
3105    char *cBuf;
3106    UChar *uBuf,*test;
3107    int32_t uBufSize = 120;
3108    UErrorCode errorCode=U_ZERO_ERROR;
3109    UConverter *cnv;
3110    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3111    int32_t* myOff= offsets;
3112    cnv=ucnv_open("HZ", &errorCode);
3113    if(U_FAILURE(errorCode)) {
3114        log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3115        return;
3116    }
3117
3118    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3119    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3120    uSource = (const UChar*)in;
3121    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3122    cTarget = cBuf;
3123    cTargetLimit = cBuf +uBufSize*5;
3124    uTarget = uBuf;
3125    uTargetLimit = uBuf+ uBufSize*5;
3126    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3127    if(U_FAILURE(errorCode)){
3128        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3129        return;
3130    }
3131    cSource = cBuf;
3132    cSourceLimit =cTarget;
3133    test =uBuf;
3134    myOff=offsets;
3135    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3136    if(U_FAILURE(errorCode)){
3137        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3138        return;
3139    }
3140    uSource = (const UChar*)in;
3141    while(uSource<uSourceLimit){
3142        if(*test!=*uSource){
3143
3144            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3145        }
3146        uSource++;
3147        test++;
3148    }
3149    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3150    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3151    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3152    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3153    TestJitterbug930("csISO2022JP");
3154    ucnv_close(cnv);
3155    free(offsets);
3156    free(uBuf);
3157    free(cBuf);
3158}
3159
3160static void
3161TestISCII(){
3162        /* test input */
3163    static const uint16_t in[]={
3164        /* test full range of Devanagari */
3165        0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3166        0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3167        0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3168        0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3169        0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3170        0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3171        0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3172        0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3173        0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3174        0x096D,0x096E,0x096F,
3175        /* test Soft halant*/
3176        0x0915,0x094d, 0x200D,
3177        /* test explicit halant */
3178        0x0915,0x094d, 0x200c,
3179        /* test double danda */
3180        0x965,
3181        /* test ASCII */
3182        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3183        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3184        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3185        /* tests from Lotus */
3186        0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3187        0x0930,0x094D,0x200D,
3188        0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3189        0x0915,0x0921,0x002B,0x095F,
3190        /* tamil range */
3191        0x0B86, 0xB87, 0xB88,
3192        /* telugu range */
3193        0x0C05, 0x0C02, 0x0C03,0x0c31,
3194        /* kannada range */
3195        0x0C85, 0xC82, 0x0C83,
3196        /* test Abbr sign and Anudatta */
3197        0x0970, 0x952,
3198       /* 0x0958,
3199        0x0959,
3200        0x095A,
3201        0x095B,
3202        0x095C,
3203        0x095D,
3204        0x095E,
3205        0x095F,*/
3206        0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3207        0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3208        0x090C ,
3209        0x0962,
3210        0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3211        0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3212        0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3213        0x093D /* Avagraha  0xEA, 0xE9*/,
3214        0x0958,
3215        0x0959,
3216        0x095A,
3217        0x095B,
3218        0x095C,
3219        0x095D,
3220        0x095E,
3221        0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3222      };
3223    static const unsigned char byteArr[]={
3224
3225        0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3226        0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3227        0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3228        0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3229        0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3230        0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3231        0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3232        0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3233        0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3234        0xf8,0xf9,0xfa,
3235        /* test soft halant */
3236        0xb3, 0xE8, 0xE9,
3237        /* test explicit halant */
3238        0xb3, 0xE8, 0xE8,
3239        /* test double danda */
3240        0xea, 0xea,
3241        /* test ASCII */
3242        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3243        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3244        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3245        /* test ATR code */
3246
3247        /* tests from Lotus */
3248        0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3249        0xEF,0x42,0xCF,0xE8,0xD9,
3250        0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3251        0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3252        /* tamil range */
3253        0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3254        /* telugu range */
3255        0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3256        /* kannada range */
3257        0xEF, 0x48,0xa4, 0xa2, 0xa3,
3258        /* anudatta and abbreviation sign */
3259        0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3260
3261
3262        0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3263
3264        0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3265
3266        0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3267
3268        0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3269
3270        0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3271
3272        0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3273
3274        0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3275
3276        0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3277
3278        0xB3, 0xE9, /* Ka + NUKTA */
3279
3280        0xB4, 0xE9, /* Kha + NUKTA */
3281
3282        0xB5, 0xE9, /* Ga + NUKTA */
3283
3284        0xBA, 0xE9,
3285
3286        0xBF, 0xE9,
3287
3288        0xC0, 0xE9,
3289
3290        0xC9, 0xE9,
3291        /* INV halant RA    */
3292        0xD9, 0xE8, 0xCF,
3293        0x00, 0x00A0,
3294        /* just consume unhandled codepoints */
3295        0xEF, 0x30,
3296
3297    };
3298    testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3299    TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3300
3301}
3302
3303static void
3304TestISO_2022_JP() {
3305    /* test input */
3306    static const uint16_t in[]={
3307        0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3308        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3309        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3310        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3311        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3312        0x201D, 0x3014, 0x000D, 0x000A,
3313        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3314        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3315        };
3316    const UChar* uSource;
3317    const UChar* uSourceLimit;
3318    const char* cSource;
3319    const char* cSourceLimit;
3320    UChar *uTargetLimit =NULL;
3321    UChar *uTarget;
3322    char *cTarget;
3323    const char *cTargetLimit;
3324    char *cBuf;
3325    UChar *uBuf,*test;
3326    int32_t uBufSize = 120;
3327    UErrorCode errorCode=U_ZERO_ERROR;
3328    UConverter *cnv;
3329    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3330    int32_t* myOff= offsets;
3331    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3332    if(U_FAILURE(errorCode)) {
3333        log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3334        return;
3335    }
3336
3337    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3338    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3339    uSource = (const UChar*)in;
3340    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3341    cTarget = cBuf;
3342    cTargetLimit = cBuf +uBufSize*5;
3343    uTarget = uBuf;
3344    uTargetLimit = uBuf+ uBufSize*5;
3345    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3346    if(U_FAILURE(errorCode)){
3347        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3348        return;
3349    }
3350    cSource = cBuf;
3351    cSourceLimit =cTarget;
3352    test =uBuf;
3353    myOff=offsets;
3354    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3355    if(U_FAILURE(errorCode)){
3356        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3357        return;
3358    }
3359
3360    uSource = (const UChar*)in;
3361    while(uSource<uSourceLimit){
3362        if(*test!=*uSource){
3363
3364            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3365        }
3366        uSource++;
3367        test++;
3368    }
3369
3370    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3371    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3372    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3373    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3374    TestJitterbug930("csISO2022JP");
3375    ucnv_close(cnv);
3376    free(uBuf);
3377    free(cBuf);
3378    free(offsets);
3379}
3380
3381static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3382    const UChar* uSource;
3383    const UChar* uSourceLimit;
3384    const char* cSource;
3385    const char* cSourceLimit;
3386    UChar *uTargetLimit =NULL;
3387    UChar *uTarget;
3388    char *cTarget;
3389    const char *cTargetLimit;
3390    char *cBuf;
3391    UChar *uBuf,*test;
3392    int32_t uBufSize = 120*10;
3393    UErrorCode errorCode=U_ZERO_ERROR;
3394    UConverter *cnv;
3395    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3396    int32_t* myOff= offsets;
3397    cnv=my_ucnv_open(conv, &errorCode);
3398    if(U_FAILURE(errorCode)) {
3399        log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3400        return;
3401    }
3402
3403    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3404    cBuf =(char*)malloc(uBufSize * sizeof(char));
3405    uSource = (const UChar*)in;
3406    uSourceLimit=uSource+len;
3407    cTarget = cBuf;
3408    cTargetLimit = cBuf +uBufSize;
3409    uTarget = uBuf;
3410    uTargetLimit = uBuf+ uBufSize;
3411    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3412    if(U_FAILURE(errorCode)){
3413        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3414        return;
3415    }
3416    /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3417    cSource = cBuf;
3418    cSourceLimit =cTarget;
3419    test =uBuf;
3420    myOff=offsets;
3421    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3422    if(U_FAILURE(errorCode)){
3423        log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3424        return;
3425    }
3426
3427    uSource = (const UChar*)in;
3428    while(uSource<uSourceLimit){
3429        if(*test!=*uSource){
3430            log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3431        }
3432        uSource++;
3433        test++;
3434    }
3435    TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3436    TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3437    TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3438    if(byteArr && byteArrLen!=0){
3439        TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3440        TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3441        {
3442            cSource = byteArr;
3443            cSourceLimit = cSource+byteArrLen;
3444            test=uBuf;
3445            myOff = offsets;
3446            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3447            if(U_FAILURE(errorCode)){
3448                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3449                return;
3450            }
3451
3452            uSource = (const UChar*)in;
3453            while(uSource<uSourceLimit){
3454                if(*test!=*uSource){
3455                    log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3456                }
3457                uSource++;
3458                test++;
3459            }
3460        }
3461    }
3462
3463    ucnv_close(cnv);
3464    free(uBuf);
3465    free(cBuf);
3466    free(offsets);
3467}
3468static UChar U_CALLCONV
3469_charAt(int32_t offset, void *context) {
3470    return ((char*)context)[offset];
3471}
3472
3473static int32_t
3474unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3475    int32_t srcIndex=0;
3476    int32_t dstIndex=0;
3477    if(U_FAILURE(*status)){
3478        return 0;
3479    }
3480    if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3481        *status = U_ILLEGAL_ARGUMENT_ERROR;
3482        return 0;
3483    }
3484    if(srcLen==-1){
3485        srcLen = (int32_t)uprv_strlen(src);
3486    }
3487
3488    for (; srcIndex<srcLen; ) {
3489        UChar32 c = src[srcIndex++];
3490        if (c == 0x005C /*'\\'*/) {
3491            c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3492            if (c == (UChar32)0xFFFFFFFF) {
3493                *status=U_INVALID_CHAR_FOUND; /* return empty string */
3494                break; /* invalid escape sequence */
3495            }
3496        }
3497        if(dstIndex < dstLen){
3498            if(c>0xFFFF){
3499               dst[dstIndex++] = UTF16_LEAD(c);
3500               if(dstIndex<dstLen){
3501                    dst[dstIndex]=UTF16_TRAIL(c);
3502               }else{
3503                   *status=U_BUFFER_OVERFLOW_ERROR;
3504               }
3505            }else{
3506                dst[dstIndex]=(UChar)c;
3507            }
3508
3509        }else{
3510            *status = U_BUFFER_OVERFLOW_ERROR;
3511        }
3512        dstIndex++; /* for preflighting */
3513    }
3514    return dstIndex;
3515}
3516
3517static void
3518TestFullRoundtrip(const char* cp){
3519    UChar usource[10] ={0};
3520    UChar nsrc[10] = {0};
3521    uint32_t i=1;
3522    int len=0, ulen;
3523    nsrc[0]=0x0061;
3524    /* Test codepoint 0 */
3525    TestConv(usource,1,cp,"",NULL,0);
3526    TestConv(usource,2,cp,"",NULL,0);
3527    nsrc[2]=0x5555;
3528    TestConv(nsrc,3,cp,"",NULL,0);
3529
3530    for(;i<=0x10FFFF;i++){
3531        if(i==0xD800){
3532            i=0xDFFF;
3533            continue;
3534        }
3535        if(i<=0xFFFF){
3536            usource[0] =(UChar) i;
3537            len=1;
3538        }else{
3539            usource[0]=UTF16_LEAD(i);
3540            usource[1]=UTF16_TRAIL(i);
3541            len=2;
3542        }
3543        ulen=len;
3544        if(i==0x80) {
3545            usource[2]=0;
3546        }
3547        /* Test only single code points */
3548        TestConv(usource,ulen,cp,"",NULL,0);
3549        /* Test codepoint repeated twice */
3550        usource[ulen]=usource[0];
3551        usource[ulen+1]=usource[1];
3552        ulen+=len;
3553        TestConv(usource,ulen,cp,"",NULL,0);
3554        /* Test codepoint repeated 3 times */
3555        usource[ulen]=usource[0];
3556        usource[ulen+1]=usource[1];
3557        ulen+=len;
3558        TestConv(usource,ulen,cp,"",NULL,0);
3559        /* Test codepoint in between 2 codepoints */
3560        nsrc[1]=usource[0];
3561        nsrc[2]=usource[1];
3562        nsrc[len+1]=0x5555;
3563        TestConv(nsrc,len+2,cp,"",NULL,0);
3564        uprv_memset(usource,0,sizeof(UChar)*10);
3565    }
3566}
3567
3568static void
3569TestRoundTrippingAllUTF(void){
3570    if(!getTestOption(QUICK_OPTION)){
3571        log_verbose("Running exhaustive round trip test for BOCU-1\n");
3572        TestFullRoundtrip("BOCU-1");
3573        log_verbose("Running exhaustive round trip test for SCSU\n");
3574        TestFullRoundtrip("SCSU");
3575        log_verbose("Running exhaustive round trip test for UTF-8\n");
3576        TestFullRoundtrip("UTF-8");
3577        log_verbose("Running exhaustive round trip test for CESU-8\n");
3578        TestFullRoundtrip("CESU-8");
3579        log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3580        TestFullRoundtrip("UTF-16BE");
3581        log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3582        TestFullRoundtrip("UTF-16LE");
3583        log_verbose("Running exhaustive round trip test for UTF-16\n");
3584        TestFullRoundtrip("UTF-16");
3585        log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3586        TestFullRoundtrip("UTF-32BE");
3587        log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3588        TestFullRoundtrip("UTF-32LE");
3589        log_verbose("Running exhaustive round trip test for UTF-32\n");
3590        TestFullRoundtrip("UTF-32");
3591        log_verbose("Running exhaustive round trip test for UTF-7\n");
3592        TestFullRoundtrip("UTF-7");
3593        log_verbose("Running exhaustive round trip test for UTF-7\n");
3594        TestFullRoundtrip("UTF-7,version=1");
3595        log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3596        TestFullRoundtrip("IMAP-mailbox-name");
3597        /*
3598         *
3599         * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3600         * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3601         * The old mappings remain as fallbacks.
3602         * This test may be reintroduced at a later time.
3603         *
3604         * 110118 - mow
3605         */
3606         /*
3607         log_verbose("Running exhaustive round trip test for GB18030\n");
3608         TestFullRoundtrip("GB18030");
3609         */
3610    }
3611}
3612
3613static void
3614TestSCSU() {
3615
3616    static const uint16_t germanUTF16[]={
3617        0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3618    };
3619
3620    static const uint8_t germanSCSU[]={
3621        0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3622    };
3623
3624    static const uint16_t russianUTF16[]={
3625        0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3626    };
3627
3628    static const uint8_t russianSCSU[]={
3629        0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3630    };
3631
3632    static const uint16_t japaneseUTF16[]={
3633        0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3634        0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3635        0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3636        0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3637        0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3638        0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3639        0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3640        0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3641        0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3642        0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3643        0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3644        0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3645        0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3646        0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3647        0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3648    };
3649
3650    /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3651     it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3652    static const uint8_t japaneseSCSU[]={
3653        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3654        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3655        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3656        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3657        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3658        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3659        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3660        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3661        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3662        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3663        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3664        0xcb, 0x82
3665    };
3666
3667    static const uint16_t allFeaturesUTF16[]={
3668        0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3669        0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3670        0x01df, 0xf000, 0xdbff, 0xdfff
3671    };
3672
3673    /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3674     * result here (34B vs. 35B)
3675     */
3676    static const uint8_t allFeaturesSCSU[]={
3677        0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3678        0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3679        0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3680        0xdf, 0x14, 0x80, 0x15, 0xff
3681    };
3682    static const uint16_t monkeyIn[]={
3683        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3684        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3685        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3686        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3687        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3688        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3689        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3690        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3691        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3692        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3693        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3694        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3695        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3696        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3697        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3698        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3699        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3700        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3701        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3702        /* test non-BMP code points */
3703        0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3704        0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3705        0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3706        0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3707        0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3708        0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3709        0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3710        0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3711        0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3712        0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3713        0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3714
3715
3716        0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3717        0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3718        0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3719        0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3720        0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3721    };
3722    static const char *fTestCases [] = {
3723          "\\ud800\\udc00", /* smallest surrogate*/
3724          "\\ud8ff\\udcff",
3725          "\\udBff\\udFff", /* largest surrogate pair*/
3726          "\\ud834\\udc00",
3727          "\\U0010FFFF",
3728          "Hello \\u9292 \\u9192 World!",
3729          "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3730          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3731
3732          "\\u0648\\u06c8", /* catch missing reset*/
3733          "\\u0648\\u06c8",
3734
3735          "\\u4444\\uE001", /* lowest quotable*/
3736          "\\u4444\\uf2FF", /* highest quotable*/
3737          "\\u4444\\uf188\\u4444",
3738          "\\u4444\\uf188\\uf288",
3739          "\\u4444\\uf188abc\\u0429\\uf288",
3740          "\\u9292\\u2222",
3741          "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3742          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3743          "Hello World!123456",
3744          "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3745
3746          "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3747          "abc\\u4411d",      /* uses SQU*/
3748          "abc\\u4411\\u4412d",/* uses SCU*/
3749          "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3750          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3751          "\\u9292\\u2222",
3752          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3753          "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3754          "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3755
3756          "", /* empty input*/
3757          "\\u0000", /* smallest BMP character*/
3758          "\\uFFFF", /* largest BMP character*/
3759
3760          /* regression tests*/
3761          "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3762          "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3763          "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3764          "\\u0041\\u00df\\u0401\\u015f",
3765          "\\u9066\\u2123abc",
3766          "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3767          "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3768    };
3769    int i=0;
3770    for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3771        const char* cSrc = fTestCases[i];
3772        UErrorCode status = U_ZERO_ERROR;
3773        int32_t cSrcLen,srcLen;
3774        UChar* src;
3775        /* UConverter* cnv = ucnv_open("SCSU",&status); */
3776        cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3777        src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3778        srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3779        log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3780        TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3781        free(src);
3782    }
3783    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3784    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3785    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3786    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3787    TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3788    TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3789    TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3790}
3791
3792#if !UCONFIG_NO_LEGACY_CONVERSION
3793static void TestJitterbug2346(){
3794    char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3795                      0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3796    uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3797
3798    UChar uTarget[500]={'\0'};
3799    UChar* utarget=uTarget;
3800    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3801
3802    char cTarget[500]={'\0'};
3803    char* ctarget=cTarget;
3804    char* ctargetLimit=cTarget+sizeof(cTarget);
3805    const char* csource=source;
3806    UChar* temp = expected;
3807    UErrorCode err=U_ZERO_ERROR;
3808
3809    UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3810    if(U_FAILURE(err)) {
3811        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3812        return;
3813    }
3814    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3815    if(U_FAILURE(err)) {
3816        log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3817        return;
3818    }
3819    utargetLimit=utarget;
3820    utarget = uTarget;
3821    while(utarget<utargetLimit){
3822        if(*temp!=*utarget){
3823
3824            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3825        }
3826        utarget++;
3827        temp++;
3828    }
3829    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3830    if(U_FAILURE(err)) {
3831        log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3832        return;
3833    }
3834    ctargetLimit=ctarget;
3835    ctarget =cTarget;
3836    ucnv_close(conv);
3837
3838
3839}
3840
3841static void
3842TestISO_2022_JP_1() {
3843    /* test input */
3844    static const uint16_t in[]={
3845        0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3846        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3847        0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3848        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3849        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3850        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3851        0x201D, 0x000D, 0x000A,
3852        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3853        0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3854        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3855        0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3856        0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3857        0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3858      };
3859    const UChar* uSource;
3860    const UChar* uSourceLimit;
3861    const char* cSource;
3862    const char* cSourceLimit;
3863    UChar *uTargetLimit =NULL;
3864    UChar *uTarget;
3865    char *cTarget;
3866    const char *cTargetLimit;
3867    char *cBuf;
3868    UChar *uBuf,*test;
3869    int32_t uBufSize = 120;
3870    UErrorCode errorCode=U_ZERO_ERROR;
3871    UConverter *cnv;
3872
3873    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3874    if(U_FAILURE(errorCode)) {
3875        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3876        return;
3877    }
3878
3879    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3880    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3881    uSource = (const UChar*)in;
3882    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3883    cTarget = cBuf;
3884    cTargetLimit = cBuf +uBufSize*5;
3885    uTarget = uBuf;
3886    uTargetLimit = uBuf+ uBufSize*5;
3887    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3888    if(U_FAILURE(errorCode)){
3889        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3890        return;
3891    }
3892    cSource = cBuf;
3893    cSourceLimit =cTarget;
3894    test =uBuf;
3895    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3896    if(U_FAILURE(errorCode)){
3897        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3898        return;
3899    }
3900    uSource = (const UChar*)in;
3901    while(uSource<uSourceLimit){
3902        if(*test!=*uSource){
3903
3904            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3905        }
3906        uSource++;
3907        test++;
3908    }
3909    /*ucnv_close(cnv);
3910    cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3911    /*Test for the condition where there is an invalid character*/
3912    ucnv_reset(cnv);
3913    {
3914        static const uint8_t source2[]={0x0e,0x24,0x053};
3915        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3916    }
3917    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3918    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3919    ucnv_close(cnv);
3920    free(uBuf);
3921    free(cBuf);
3922}
3923
3924static void
3925TestISO_2022_JP_2() {
3926    /* test input */
3927    static const uint16_t in[]={
3928        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3929        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3930        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3931        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3932        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3933        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3934        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3935        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3936        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3937        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3938        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3939        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3940        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3941        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3942        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3943        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3944        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3945        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3946        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3947      };
3948    const UChar* uSource;
3949    const UChar* uSourceLimit;
3950    const char* cSource;
3951    const char* cSourceLimit;
3952    UChar *uTargetLimit =NULL;
3953    UChar *uTarget;
3954    char *cTarget;
3955    const char *cTargetLimit;
3956    char *cBuf;
3957    UChar *uBuf,*test;
3958    int32_t uBufSize = 120;
3959    UErrorCode errorCode=U_ZERO_ERROR;
3960    UConverter *cnv;
3961    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3962    int32_t* myOff= offsets;
3963    cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3964    if(U_FAILURE(errorCode)) {
3965        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3966        return;
3967    }
3968
3969    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3970    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3971    uSource = (const UChar*)in;
3972    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3973    cTarget = cBuf;
3974    cTargetLimit = cBuf +uBufSize*5;
3975    uTarget = uBuf;
3976    uTargetLimit = uBuf+ uBufSize*5;
3977    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3978    if(U_FAILURE(errorCode)){
3979        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3980        return;
3981    }
3982    cSource = cBuf;
3983    cSourceLimit =cTarget;
3984    test =uBuf;
3985    myOff=offsets;
3986    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3987    if(U_FAILURE(errorCode)){
3988        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3989        return;
3990    }
3991    uSource = (const UChar*)in;
3992    while(uSource<uSourceLimit){
3993        if(*test!=*uSource){
3994
3995            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3996        }
3997        uSource++;
3998        test++;
3999    }
4000    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4001    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4002    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4003    /*Test for the condition where there is an invalid character*/
4004    ucnv_reset(cnv);
4005    {
4006        static const uint8_t source2[]={0x0e,0x24,0x053};
4007        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4008    }
4009    ucnv_close(cnv);
4010    free(uBuf);
4011    free(cBuf);
4012    free(offsets);
4013}
4014
4015static void
4016TestISO_2022_KR() {
4017    /* test input */
4018    static const uint16_t in[]={
4019                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4020                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4021                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4022                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4023                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4024                   ,0x53E3,0x53E4,0x000A,0x000D};
4025    const UChar* uSource;
4026    const UChar* uSourceLimit;
4027    const char* cSource;
4028    const char* cSourceLimit;
4029    UChar *uTargetLimit =NULL;
4030    UChar *uTarget;
4031    char *cTarget;
4032    const char *cTargetLimit;
4033    char *cBuf;
4034    UChar *uBuf,*test;
4035    int32_t uBufSize = 120;
4036    UErrorCode errorCode=U_ZERO_ERROR;
4037    UConverter *cnv;
4038    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4039    int32_t* myOff= offsets;
4040    cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4041    if(U_FAILURE(errorCode)) {
4042        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4043        return;
4044    }
4045
4046    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4047    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4048    uSource = (const UChar*)in;
4049    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4050    cTarget = cBuf;
4051    cTargetLimit = cBuf +uBufSize*5;
4052    uTarget = uBuf;
4053    uTargetLimit = uBuf+ uBufSize*5;
4054    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4055    if(U_FAILURE(errorCode)){
4056        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4057        return;
4058    }
4059    cSource = cBuf;
4060    cSourceLimit =cTarget;
4061    test =uBuf;
4062    myOff=offsets;
4063    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4064    if(U_FAILURE(errorCode)){
4065        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4066        return;
4067    }
4068    uSource = (const UChar*)in;
4069    while(uSource<uSourceLimit){
4070        if(*test!=*uSource){
4071            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4072        }
4073        uSource++;
4074        test++;
4075    }
4076    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4077    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4078    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4079    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4080    TestJitterbug930("csISO2022KR");
4081    /*Test for the condition where there is an invalid character*/
4082    ucnv_reset(cnv);
4083    {
4084        static const uint8_t source2[]={0x1b,0x24,0x053};
4085        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4086        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4087    }
4088    ucnv_close(cnv);
4089    free(uBuf);
4090    free(cBuf);
4091    free(offsets);
4092}
4093
4094static void
4095TestISO_2022_KR_1() {
4096    /* test input */
4097    static const uint16_t in[]={
4098                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4099                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4100                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4101                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4102                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4103                   ,0x53E3,0x53E4,0x000A,0x000D};
4104    const UChar* uSource;
4105    const UChar* uSourceLimit;
4106    const char* cSource;
4107    const char* cSourceLimit;
4108    UChar *uTargetLimit =NULL;
4109    UChar *uTarget;
4110    char *cTarget;
4111    const char *cTargetLimit;
4112    char *cBuf;
4113    UChar *uBuf,*test;
4114    int32_t uBufSize = 120;
4115    UErrorCode errorCode=U_ZERO_ERROR;
4116    UConverter *cnv;
4117    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4118    int32_t* myOff= offsets;
4119    cnv=ucnv_open("ibm-25546", &errorCode);
4120    if(U_FAILURE(errorCode)) {
4121        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4122        return;
4123    }
4124
4125    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4126    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4127    uSource = (const UChar*)in;
4128    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4129    cTarget = cBuf;
4130    cTargetLimit = cBuf +uBufSize*5;
4131    uTarget = uBuf;
4132    uTargetLimit = uBuf+ uBufSize*5;
4133    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4134    if(U_FAILURE(errorCode)){
4135        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4136        return;
4137    }
4138    cSource = cBuf;
4139    cSourceLimit =cTarget;
4140    test =uBuf;
4141    myOff=offsets;
4142    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4143    if(U_FAILURE(errorCode)){
4144        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4145        return;
4146    }
4147    uSource = (const UChar*)in;
4148    while(uSource<uSourceLimit){
4149        if(*test!=*uSource){
4150            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4151        }
4152        uSource++;
4153        test++;
4154    }
4155    ucnv_reset(cnv);
4156    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4157    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4158    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4159    ucnv_reset(cnv);
4160    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4161        /*Test for the condition where there is an invalid character*/
4162    ucnv_reset(cnv);
4163    {
4164        static const uint8_t source2[]={0x1b,0x24,0x053};
4165        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4166        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4167    }
4168    ucnv_close(cnv);
4169    free(uBuf);
4170    free(cBuf);
4171    free(offsets);
4172}
4173
4174static void TestJitterbug2411(){
4175    static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4176                         "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4177    UConverter* kr=NULL, *kr1=NULL;
4178    UErrorCode errorCode = U_ZERO_ERROR;
4179    UChar tgt[100]={'\0'};
4180    UChar* target = tgt;
4181    UChar* targetLimit = target+100;
4182    kr=ucnv_open("iso-2022-kr", &errorCode);
4183    if(U_FAILURE(errorCode)) {
4184        log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4185        return;
4186    }
4187    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4188    if(U_FAILURE(errorCode)) {
4189        log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4190        return;
4191    }
4192    kr1 = ucnv_open("ibm-25546", &errorCode);
4193    if(U_FAILURE(errorCode)) {
4194        log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4195        return;
4196    }
4197    target = tgt;
4198    targetLimit = target+100;
4199    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4200
4201    if(U_FAILURE(errorCode)) {
4202        log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4203        return;
4204    }
4205
4206    ucnv_close(kr);
4207    ucnv_close(kr1);
4208
4209}
4210
4211static void
4212TestJIS(){
4213    /* From Unicode moved to testdata/conversion.txt */
4214    /*To Unicode*/
4215    {
4216        static const uint8_t sampleTextJIS[] = {
4217            0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4218            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4219            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4220        };
4221        static const uint16_t expectedISO2022JIS[] = {
4222            0x0041, 0x0042,
4223            0xFF81, 0xFF82,
4224            0x3000
4225        };
4226        static const int32_t  toISO2022JISOffs[]={
4227            3,4,
4228            8,9,
4229            16
4230        };
4231
4232        static const uint8_t sampleTextJIS7[] = {
4233            0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4234            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4235            0x1b,0x24,0x42,0x21,0x21,
4236            0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4237            0x21,0x22,
4238            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4239        };
4240        static const uint16_t expectedISO2022JIS7[] = {
4241            0x0041, 0x0042,
4242            0xFF81, 0xFF82,
4243            0x3000,
4244            0xFF81, 0xFF82,
4245            0x3001,
4246            0x3000
4247        };
4248        static const int32_t  toISO2022JIS7Offs[]={
4249            3,4,
4250            8,9,
4251            13,16,
4252            17,
4253            19,27
4254        };
4255        static const uint8_t sampleTextJIS8[] = {
4256            0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4257            0xa1,0xc8,0xd9,/*Katakana Set*/
4258            0x1b,0x28,0x42,
4259            0x41,0x42,
4260            0xb1,0xc3, /*Katakana Set*/
4261            0x1b,0x24,0x42,0x21,0x21
4262        };
4263        static const uint16_t expectedISO2022JIS8[] = {
4264            0x0041, 0x0042,
4265            0xff61, 0xff88, 0xff99,
4266            0x0041, 0x0042,
4267            0xff71, 0xff83,
4268            0x3000
4269        };
4270        static const int32_t  toISO2022JIS8Offs[]={
4271            3, 4,  5,  6,
4272            7, 11, 12, 13,
4273            14, 18,
4274        };
4275
4276        testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4277            sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4278        testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4279            sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4280        testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4281            sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4282    }
4283
4284}
4285
4286
4287#if 0
4288 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4289
4290static void TestJitterbug915(){
4291/* tests for roundtripping of the below sequence
4292\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4293\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4294\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4295\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4296\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4297\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4298\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4299*/
4300    static const char cSource[]={
4301        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4302        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4303        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4304        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4305        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4306        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4307        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4308        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4309        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4310        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4311        0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4312        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4313        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4314        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4315        0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4316        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4317        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4318        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4319        0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4320        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4321        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4322        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4323        0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4324        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4325        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4326        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4327        0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4328        0x37, 0x20, 0x2A, 0x2F
4329    };
4330    UChar uTarget[500]={'\0'};
4331    UChar* utarget=uTarget;
4332    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4333
4334    char cTarget[500]={'\0'};
4335    char* ctarget=cTarget;
4336    char* ctargetLimit=cTarget+sizeof(cTarget);
4337    const char* csource=cSource;
4338    const char* tempSrc = cSource;
4339    UErrorCode err=U_ZERO_ERROR;
4340
4341    UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4342    if(U_FAILURE(err)) {
4343        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4344        return;
4345    }
4346    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4347    if(U_FAILURE(err)) {
4348        log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4349        return;
4350    }
4351    utargetLimit=utarget;
4352    utarget = uTarget;
4353    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4354    if(U_FAILURE(err)) {
4355        log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4356        return;
4357    }
4358    ctargetLimit=ctarget;
4359    ctarget =cTarget;
4360    while(ctarget<ctargetLimit){
4361        if(*ctarget != *tempSrc){
4362            log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4363        }
4364        ++ctarget;
4365        ++tempSrc;
4366    }
4367
4368    ucnv_close(conv);
4369}
4370
4371static void
4372TestISO_2022_CN_EXT() {
4373    /* test input */
4374    static const uint16_t in[]={
4375                /* test Non-BMP code points */
4376         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4377         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4378         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4379         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4380         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4381         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4382         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4383         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4384         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4385         0xD869, 0xDED5,
4386
4387         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4388         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4389         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4390         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4391         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4392         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4393         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4394         0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4395         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4396         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4397         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4398         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4399         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4400         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4401         0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4402         0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4403         0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4404         0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4405
4406         0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4407
4408      };
4409
4410    const UChar* uSource;
4411    const UChar* uSourceLimit;
4412    const char* cSource;
4413    const char* cSourceLimit;
4414    UChar *uTargetLimit =NULL;
4415    UChar *uTarget;
4416    char *cTarget;
4417    const char *cTargetLimit;
4418    char *cBuf;
4419    UChar *uBuf,*test;
4420    int32_t uBufSize = 180;
4421    UErrorCode errorCode=U_ZERO_ERROR;
4422    UConverter *cnv;
4423    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4424    int32_t* myOff= offsets;
4425    cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4426    if(U_FAILURE(errorCode)) {
4427        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4428        return;
4429    }
4430
4431    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4432    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4433    uSource = (const UChar*)in;
4434    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4435    cTarget = cBuf;
4436    cTargetLimit = cBuf +uBufSize*5;
4437    uTarget = uBuf;
4438    uTargetLimit = uBuf+ uBufSize*5;
4439    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4440    if(U_FAILURE(errorCode)){
4441        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4442        return;
4443    }
4444    cSource = cBuf;
4445    cSourceLimit =cTarget;
4446    test =uBuf;
4447    myOff=offsets;
4448    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4449    if(U_FAILURE(errorCode)){
4450        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4451        return;
4452    }
4453    uSource = (const UChar*)in;
4454    while(uSource<uSourceLimit){
4455        if(*test!=*uSource){
4456            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4457        }
4458        else{
4459            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4460        }
4461        uSource++;
4462        test++;
4463    }
4464    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4465    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4466    /*Test for the condition where there is an invalid character*/
4467    ucnv_reset(cnv);
4468    {
4469        static const uint8_t source2[]={0x0e,0x24,0x053};
4470        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4471    }
4472    ucnv_close(cnv);
4473    free(uBuf);
4474    free(cBuf);
4475    free(offsets);
4476}
4477#endif
4478
4479static void
4480TestISO_2022_CN() {
4481    /* test input */
4482    static const uint16_t in[]={
4483         /* jitterbug 951 */
4484         0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4485         0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4486         0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4487         0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4488         0x0020, 0x0045, 0x004e, 0x0044,
4489         /**/
4490         0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4491         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4492         0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4493         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4494         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4495         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4496         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4497         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4498         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4499         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4500         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4501         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4502         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4503         0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4504         0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4505         0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4506         0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4507
4508      };
4509    const UChar* uSource;
4510    const UChar* uSourceLimit;
4511    const char* cSource;
4512    const char* cSourceLimit;
4513    UChar *uTargetLimit =NULL;
4514    UChar *uTarget;
4515    char *cTarget;
4516    const char *cTargetLimit;
4517    char *cBuf;
4518    UChar *uBuf,*test;
4519    int32_t uBufSize = 180;
4520    UErrorCode errorCode=U_ZERO_ERROR;
4521    UConverter *cnv;
4522    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4523    int32_t* myOff= offsets;
4524    cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4525    if(U_FAILURE(errorCode)) {
4526        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4527        return;
4528    }
4529
4530    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4531    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4532    uSource = (const UChar*)in;
4533    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4534    cTarget = cBuf;
4535    cTargetLimit = cBuf +uBufSize*5;
4536    uTarget = uBuf;
4537    uTargetLimit = uBuf+ uBufSize*5;
4538    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4539    if(U_FAILURE(errorCode)){
4540        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4541        return;
4542    }
4543    cSource = cBuf;
4544    cSourceLimit =cTarget;
4545    test =uBuf;
4546    myOff=offsets;
4547    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4548    if(U_FAILURE(errorCode)){
4549        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4550        return;
4551    }
4552    uSource = (const UChar*)in;
4553    while(uSource<uSourceLimit){
4554        if(*test!=*uSource){
4555            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4556        }
4557        else{
4558            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4559        }
4560        uSource++;
4561        test++;
4562    }
4563    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4564    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4565    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4566    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4567    TestJitterbug930("csISO2022CN");
4568    /*Test for the condition where there is an invalid character*/
4569    ucnv_reset(cnv);
4570    {
4571        static const uint8_t source2[]={0x0e,0x24,0x053};
4572        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4573    }
4574
4575    ucnv_close(cnv);
4576    free(uBuf);
4577    free(cBuf);
4578    free(offsets);
4579}
4580
4581/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4582typedef struct {
4583    const char *    converterName;
4584    const char *    inputText;
4585    int             inputTextLength;
4586} EmptySegmentTest;
4587
4588/* Callback for TestJitterbug6175, should only get called for empty segment errors */
4589static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4590                                             int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4591    if (reason > UCNV_IRREGULAR) {
4592        return;
4593    }
4594    if (reason != UCNV_IRREGULAR) {
4595        log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4596    }
4597    /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4598    *err = U_ZERO_ERROR;
4599    ucnv_cbToUWriteSub(toArgs,0,err);
4600}
4601
4602enum { kEmptySegmentToUCharsMax = 64 };
4603static void TestJitterbug6175(void) {
4604    static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4605    static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4606    static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4607    static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4608    static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4609    static const EmptySegmentTest emptySegmentTests[] = {
4610        /* converterName inputText    inputTextLength */
4611        { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4612        { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4613        { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4614        { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4615        { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4616        /* terminator: */
4617        { NULL,          NULL,        0,                  }
4618    };
4619    const EmptySegmentTest * testPtr;
4620    for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4621        UErrorCode   err = U_ZERO_ERROR;
4622        UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4623        if (U_FAILURE(err)) {
4624            log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4625            return;
4626        }
4627        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4628        if (U_FAILURE(err)) {
4629            log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4630            ucnv_close(cnv);
4631            return;
4632        }
4633        {
4634            UChar         toUChars[kEmptySegmentToUCharsMax];
4635            UChar *       toUCharsPtr = toUChars;
4636            const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4637            const char *  inCharsPtr = testPtr->inputText;
4638            const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4639            ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4640        }
4641        ucnv_close(cnv);
4642    }
4643}
4644
4645static void
4646TestEBCDIC_STATEFUL() {
4647    /* test input */
4648    static const uint8_t in[]={
4649        0x61,
4650        0x1a,
4651        0x0f, 0x4b,
4652        0x42,
4653        0x40,
4654        0x36,
4655    };
4656
4657    /* expected test results */
4658    static const int32_t results[]={
4659        /* number of bytes read, code point */
4660        1, 0x002f,
4661        1, 0x0092,
4662        2, 0x002e,
4663        1, 0xff62,
4664        1, 0x0020,
4665        1, 0x0096,
4666
4667    };
4668    static const uint8_t in2[]={
4669        0x0f,
4670        0xa1,
4671        0x01
4672    };
4673
4674    /* expected test results */
4675    static const int32_t results2[]={
4676        /* number of bytes read, code point */
4677        2, 0x203E,
4678        1, 0x0001,
4679    };
4680
4681    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4682    UErrorCode errorCode=U_ZERO_ERROR;
4683    UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4684    if(U_FAILURE(errorCode)) {
4685        log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4686        return;
4687    }
4688    TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4689    ucnv_reset(cnv);
4690     /* Test the condition when source >= sourceLimit */
4691    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4692    ucnv_reset(cnv);
4693    /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4694    {
4695        static const uint8_t source1[]={0x0f};
4696        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4697    }
4698    /*Test for the condition where there is an invalid character*/
4699    ucnv_reset(cnv);
4700    {
4701        static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4702        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4703    }
4704    ucnv_reset(cnv);
4705    source=(const char*)in2;
4706    limit=(const char*)in2+sizeof(in2);
4707    TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4708    ucnv_close(cnv);
4709
4710}
4711
4712static void
4713TestGB18030() {
4714    /* test input */
4715    static const uint8_t in[]={
4716        0x24,
4717        0x7f,
4718        0x81, 0x30, 0x81, 0x30,
4719        0xa8, 0xbf,
4720        0xa2, 0xe3,
4721        0xd2, 0xbb,
4722        0x82, 0x35, 0x8f, 0x33,
4723        0x84, 0x31, 0xa4, 0x39,
4724        0x90, 0x30, 0x81, 0x30,
4725        0xe3, 0x32, 0x9a, 0x35
4726#if 0
4727        /*
4728         * Feature removed   markus 2000-oct-26
4729         * Only some codepages must match surrogate pairs into supplementary code points -
4730         * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4731         * GB 18030 provides direct encodings for supplementary code points, therefore
4732         * it must not combine two single-encoded surrogates into one code point.
4733         */
4734        0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4735#endif
4736    };
4737
4738    /* expected test results */
4739    static const int32_t results[]={
4740        /* number of bytes read, code point */
4741        1, 0x24,
4742        1, 0x7f,
4743        4, 0x80,
4744        2, 0x1f9,
4745        2, 0x20ac,
4746        2, 0x4e00,
4747        4, 0x9fa6,
4748        4, 0xffff,
4749        4, 0x10000,
4750        4, 0x10ffff
4751#if 0
4752        /* Feature removed. See comment above. */
4753        8, 0x10000
4754#endif
4755    };
4756
4757/*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4758    UErrorCode errorCode=U_ZERO_ERROR;
4759    UConverter *cnv=ucnv_open("gb18030", &errorCode);
4760    if(U_FAILURE(errorCode)) {
4761        log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4762        return;
4763    }
4764    TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4765    ucnv_close(cnv);
4766}
4767
4768static void
4769TestLMBCS() {
4770    /* LMBCS-1 string */
4771    static const uint8_t pszLMBCS[]={
4772        0x61,
4773        0x01, 0x29,
4774        0x81,
4775        0xA0,
4776        0x0F, 0x27,
4777        0x0F, 0x91,
4778        0x14, 0x0a, 0x74,
4779        0x14, 0xF6, 0x02,
4780        0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4781        0x10, 0x88, 0xA0,
4782    };
4783
4784    /* Unicode UChar32 equivalents */
4785    static const UChar32 pszUnicode32[]={
4786        /* code point */
4787        0x00000061,
4788        0x00002013,
4789        0x000000FC,
4790        0x000000E1,
4791        0x00000007,
4792        0x00000091,
4793        0x00000a74,
4794        0x00000200,
4795        0x00023456, /* code point for surrogate pair */
4796        0x00005516
4797    };
4798
4799/* Unicode UChar equivalents */
4800    static const UChar pszUnicode[]={
4801        /* code point */
4802        0x0061,
4803        0x2013,
4804        0x00FC,
4805        0x00E1,
4806        0x0007,
4807        0x0091,
4808        0x0a74,
4809        0x0200,
4810        0xD84D, /* low surrogate */
4811        0xDC56, /* high surrogate */
4812        0x5516
4813    };
4814
4815/* expected test results */
4816    static const int offsets32[]={
4817        /* number of bytes read, code point */
4818        0,
4819        1,
4820        3,
4821        4,
4822        5,
4823        7,
4824        9,
4825        12,
4826        15,
4827        21,
4828        24
4829    };
4830
4831/* expected test results */
4832    static const int offsets[]={
4833        /* number of bytes read, code point */
4834        0,
4835        1,
4836        3,
4837        4,
4838        5,
4839        7,
4840        9,
4841        12,
4842        15,
4843        18,
4844        21,
4845        24
4846    };
4847
4848
4849    UConverter *cnv;
4850
4851#define NAME_LMBCS_1 "LMBCS-1"
4852#define NAME_LMBCS_2 "LMBCS-2"
4853
4854
4855   /* Some basic open/close/property tests on some LMBCS converters */
4856    {
4857
4858      char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4859      char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4860      char get_subchars [1];
4861      const char * get_name;
4862      UConverter *cnv1;
4863      UConverter *cnv2;
4864
4865      int8_t len = sizeof(get_subchars);
4866
4867      UErrorCode errorCode=U_ZERO_ERROR;
4868
4869      /* Open */
4870      cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4871      if(U_FAILURE(errorCode)) {
4872         log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4873         return;
4874      }
4875      cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4876      if(U_FAILURE(errorCode)) {
4877         log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4878         return;
4879      }
4880
4881      /* Name */
4882      get_name = ucnv_getName (cnv1, &errorCode);
4883      if (strcmp(NAME_LMBCS_1,get_name)){
4884         log_err("Unexpected converter name: %s\n", get_name);
4885      }
4886      get_name = ucnv_getName (cnv2, &errorCode);
4887      if (strcmp(NAME_LMBCS_2,get_name)){
4888         log_err("Unexpected converter name: %s\n", get_name);
4889      }
4890
4891      /* substitution chars */
4892      ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4893      if(U_FAILURE(errorCode)) {
4894         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4895      }
4896      if (len!=1){
4897         log_err("Unexpected length of sub chars\n");
4898      }
4899      if (get_subchars[0] != expected_subchars[0]){
4900           log_err("Unexpected value of sub chars\n");
4901      }
4902      ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4903      if(U_FAILURE(errorCode)) {
4904         log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4905      }
4906      ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4907      if(U_FAILURE(errorCode)) {
4908         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4909      }
4910      if (len!=1){
4911         log_err("Unexpected length of sub chars\n");
4912      }
4913      if (get_subchars[0] != new_subchars[0]){
4914           log_err("Unexpected value of sub chars\n");
4915      }
4916      ucnv_close(cnv1);
4917      ucnv_close(cnv2);
4918
4919    }
4920
4921    /* LMBCS to Unicode - offsets */
4922    {
4923       UErrorCode errorCode=U_ZERO_ERROR;
4924
4925       const char * pSource = (const char *)pszLMBCS;
4926       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4927
4928       UChar Out [sizeof(pszUnicode) + 1];
4929       UChar * pOut = Out;
4930       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4931
4932       int32_t off [sizeof(offsets)];
4933
4934      /* last 'offset' in expected results is just the final size.
4935         (Makes other tests easier). Compensate here: */
4936
4937       off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4938
4939
4940
4941      cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4942      if(U_FAILURE(errorCode)) {
4943           log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4944           return;
4945      }
4946
4947
4948
4949      ucnv_toUnicode (cnv,
4950                      &pOut,
4951                      OutLimit,
4952                      &pSource,
4953                      sourceLimit,
4954                      off,
4955                      TRUE,
4956                      &errorCode);
4957
4958
4959       if (memcmp(off,offsets,sizeof(offsets)))
4960       {
4961         log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4962       }
4963       if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4964       {
4965         log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4966       }
4967       ucnv_close(cnv);
4968    }
4969    {
4970   /* LMBCS to Unicode - getNextUChar */
4971      const char * sourceStart;
4972      const char *source=(const char *)pszLMBCS;
4973      const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4974      const UChar32 *results= pszUnicode32;
4975      const int *off = offsets32;
4976
4977      UErrorCode errorCode=U_ZERO_ERROR;
4978      UChar32 uniChar;
4979
4980      cnv=ucnv_open("LMBCS-1", &errorCode);
4981      if(U_FAILURE(errorCode)) {
4982           log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4983           return;
4984      }
4985      else
4986      {
4987
4988         while(source<limit) {
4989            sourceStart=source;
4990            uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4991            if(U_FAILURE(errorCode)) {
4992                  log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4993                  break;
4994            } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4995               log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4996                   uniChar, (source-sourceStart), *results, *off);
4997               break;
4998            }
4999            results++;
5000            off++;
5001         }
5002       }
5003       ucnv_close(cnv);
5004    }
5005    { /* test locale & optimization group operations: Unicode to LMBCS */
5006
5007      UErrorCode errorCode=U_ZERO_ERROR;
5008      UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5009      UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5010      UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5011      UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5012      const UChar * pUniOut = uniString;
5013      UChar * pUniIn = uniString;
5014      uint8_t lmbcsString [4];
5015      const char * pLMBCSOut = (const char *)lmbcsString;
5016      char * pLMBCSIn = (char *)lmbcsString;
5017
5018      /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5019      ucnv_fromUnicode (cnv16he,
5020                        &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
5021                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
5022                        NULL, 1, &errorCode);
5023
5024      if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5025      {
5026         log_err("LMBCS-16,locale=he gives unexpected translation\n");
5027      }
5028
5029      pLMBCSIn= (char *)lmbcsString;
5030      pUniOut = uniString;
5031      ucnv_fromUnicode (cnv01us,
5032                        &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
5033                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
5034                        NULL, 1, &errorCode);
5035
5036      if (lmbcsString[0] != 0x9F)
5037      {
5038         log_err("LMBCS-1,locale=US gives unexpected translation\n");
5039      }
5040
5041      /* single byte char from mbcs char set */
5042      lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5043      pLMBCSOut = (const char *)lmbcsString;
5044      pUniIn = uniString;
5045      ucnv_toUnicode (cnv16jp,
5046                        &pUniIn, pUniIn + 1,
5047                        &pLMBCSOut, (pLMBCSOut + 1),
5048                        NULL, 1, &errorCode);
5049      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5050      {
5051           log_err("Unexpected results from LMBCS-16 single byte char\n");
5052      }
5053      /* convert to group 1: should be 3 bytes */
5054      pLMBCSIn = (char *)lmbcsString;
5055      pUniOut = uniString;
5056      ucnv_fromUnicode (cnv01us,
5057                        &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5058                        &pUniOut, pUniOut + 1,
5059                        NULL, 1, &errorCode);
5060      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5061         || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5062      {
5063           log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5064      }
5065      pLMBCSOut = (const char *)lmbcsString;
5066      pUniIn = uniString;
5067      ucnv_toUnicode (cnv01us,
5068                        &pUniIn, pUniIn + 1,
5069                        &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5070                        NULL, 1, &errorCode);
5071      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5072      {
5073           log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5074      }
5075      pLMBCSIn = (char *)lmbcsString;
5076      pUniOut = uniString;
5077      ucnv_fromUnicode (cnv16jp,
5078                        &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5079                        &pUniOut, pUniOut + 1,
5080                        NULL, 1, &errorCode);
5081      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5082      {
5083           log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5084      }
5085      ucnv_close(cnv16he);
5086      ucnv_close(cnv16jp);
5087      ucnv_close(cnv01us);
5088    }
5089    {
5090       /* Small source buffer testing, LMBCS -> Unicode */
5091
5092       UErrorCode errorCode=U_ZERO_ERROR;
5093
5094       const char * pSource = (const char *)pszLMBCS;
5095       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5096       int codepointCount = 0;
5097
5098       UChar Out [sizeof(pszUnicode) + 1];
5099       UChar * pOut = Out;
5100       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
5101
5102
5103       cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5104       if(U_FAILURE(errorCode)) {
5105           log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5106           return;
5107       }
5108
5109
5110       while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5111       {
5112           ucnv_toUnicode (cnv,
5113               &pOut,
5114               OutLimit,
5115               &pSource,
5116               (pSource+1), /* claim that this is a 1- byte buffer */
5117               NULL,
5118               FALSE,    /* FALSE means there might be more chars in the next buffer */
5119               &errorCode);
5120
5121           if (U_SUCCESS (errorCode))
5122           {
5123               if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5124               {
5125                   /* we are on to the next code point: check value */
5126
5127                   if (Out[0] != pszUnicode[codepointCount]){
5128                       log_err("LMBCS->Uni result %lx should have been %lx \n",
5129                           Out[0], pszUnicode[codepointCount]);
5130                   }
5131
5132                   pOut = Out; /* reset for accumulating next code point */
5133                   codepointCount++;
5134               }
5135           }
5136           else
5137           {
5138               log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5139           }
5140       }
5141       {
5142         /* limits & surrogate error testing */
5143         char LIn [sizeof(pszLMBCS)];
5144         const char * pLIn = LIn;
5145
5146         char LOut [sizeof(pszLMBCS)];
5147         char * pLOut = LOut;
5148
5149         UChar UOut [sizeof(pszUnicode)];
5150         UChar * pUOut = UOut;
5151
5152         UChar UIn [sizeof(pszUnicode)];
5153         const UChar * pUIn = UIn;
5154
5155         int32_t off [sizeof(offsets)];
5156         UChar32 uniChar;
5157
5158         errorCode=U_ZERO_ERROR;
5159
5160         /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5161         pUIn++;
5162         ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5163         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5164         {
5165            log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5166         }
5167         pUIn--;
5168
5169         errorCode=U_ZERO_ERROR;
5170         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5171         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5172         {
5173            log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5174         }
5175         errorCode=U_ZERO_ERROR;
5176
5177         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5178         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5179         {
5180            log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5181         }
5182         errorCode=U_ZERO_ERROR;
5183
5184         /* 0 byte source request - no error, no pointer movement */
5185         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5186         ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5187         if(U_FAILURE(errorCode)) {
5188            log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5189         }
5190         if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5191         {
5192              log_err("Unexpected pointer move in 0 byte source request \n");
5193         }
5194         /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5195         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5196         if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5197         {
5198            log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5199         }
5200         if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5201         {
5202            log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5203         }
5204         errorCode = U_ZERO_ERROR;
5205
5206         /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5207
5208         pUIn = pszUnicode;
5209         ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
5210         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5211         {
5212            log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5213         }
5214
5215         errorCode = U_ZERO_ERROR;
5216
5217         pLIn = (const char *)pszLMBCS;
5218         ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5219         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5220         {
5221            log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5222         }
5223
5224         /* unpaired or chopped LMBCS surrogates */
5225
5226         /* OK high surrogate, Low surrogate is chopped */
5227         LIn [0] = (char)0x14;
5228         LIn [1] = (char)0xD8;
5229         LIn [2] = (char)0x01;
5230         LIn [3] = (char)0x14;
5231         LIn [4] = (char)0xDC;
5232         pLIn = LIn;
5233         errorCode = U_ZERO_ERROR;
5234         pUOut = UOut;
5235
5236         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5237         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5238         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5239         {
5240            log_err("Unexpected results on chopped low surrogate\n");
5241         }
5242
5243         /* chopped at surrogate boundary */
5244         LIn [0] = (char)0x14;
5245         LIn [1] = (char)0xD8;
5246         LIn [2] = (char)0x01;
5247         pLIn = LIn;
5248         errorCode = U_ZERO_ERROR;
5249         pUOut = UOut;
5250
5251         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5252         if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5253         {
5254            log_err("Unexpected results on chopped at surrogate boundary \n");
5255         }
5256
5257         /* unpaired surrogate plus valid Unichar */
5258         LIn [0] = (char)0x14;
5259         LIn [1] = (char)0xD8;
5260         LIn [2] = (char)0x01;
5261         LIn [3] = (char)0x14;
5262         LIn [4] = (char)0xC9;
5263         LIn [5] = (char)0xD0;
5264         pLIn = LIn;
5265         errorCode = U_ZERO_ERROR;
5266         pUOut = UOut;
5267
5268         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5269         if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5270         {
5271            log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5272         }
5273
5274      /* unpaired surrogate plus chopped Unichar */
5275         LIn [0] = (char)0x14;
5276         LIn [1] = (char)0xD8;
5277         LIn [2] = (char)0x01;
5278         LIn [3] = (char)0x14;
5279         LIn [4] = (char)0xC9;
5280
5281         pLIn = LIn;
5282         errorCode = U_ZERO_ERROR;
5283         pUOut = UOut;
5284
5285         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5286         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5287         {
5288            log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5289         }
5290
5291         /* unpaired surrogate plus valid non-Unichar */
5292         LIn [0] = (char)0x14;
5293         LIn [1] = (char)0xD8;
5294         LIn [2] = (char)0x01;
5295         LIn [3] = (char)0x0F;
5296         LIn [4] = (char)0x3B;
5297
5298         pLIn = LIn;
5299         errorCode = U_ZERO_ERROR;
5300         pUOut = UOut;
5301
5302         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5303         if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5304         {
5305            log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5306         }
5307
5308         /* unpaired surrogate plus chopped non-Unichar */
5309         LIn [0] = (char)0x14;
5310         LIn [1] = (char)0xD8;
5311         LIn [2] = (char)0x01;
5312         LIn [3] = (char)0x0F;
5313
5314         pLIn = LIn;
5315         errorCode = U_ZERO_ERROR;
5316         pUOut = UOut;
5317
5318         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5319
5320         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5321         {
5322            log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5323         }
5324       }
5325    }
5326   ucnv_close(cnv);  /* final cleanup */
5327}
5328
5329
5330static void TestJitterbug255()
5331{
5332    static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5333    const char *testBuffer = (const char *)testBytes;
5334    const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5335    UErrorCode status = U_ZERO_ERROR;
5336    /*UChar32 result;*/
5337    UConverter *cnv = 0;
5338
5339    cnv = ucnv_open("shift-jis", &status);
5340    if (U_FAILURE(status) || cnv == 0) {
5341        log_data_err("Failed to open the converter for SJIS.\n");
5342                return;
5343    }
5344    while (testBuffer != testEnd)
5345    {
5346        /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5347        if (U_FAILURE(status))
5348        {
5349            log_err("Failed to convert the next UChar for SJIS.\n");
5350            break;
5351        }
5352    }
5353    ucnv_close(cnv);
5354}
5355
5356static void TestEBCDICUS4XML()
5357{
5358    UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5359    static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5360    static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5361    static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5362    char target_x[] = {0x00, 0x00, 0x00, 0x00};
5363    UChar *unicodes = unicodes_x;
5364    const UChar *toUnicodeMaps = toUnicodeMaps_x;
5365    char *target = target_x;
5366    const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5367    UErrorCode status = U_ZERO_ERROR;
5368    UConverter *cnv = 0;
5369
5370    cnv = ucnv_open("ebcdic-xml-us", &status);
5371    if (U_FAILURE(status) || cnv == 0) {
5372        log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5373        return;
5374    }
5375    ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5376    if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5377        log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5378            u_errorName(status));
5379        printUSeqErr(unicodes_x, 3);
5380        printUSeqErr(toUnicodeMaps, 3);
5381    }
5382    status = U_ZERO_ERROR;
5383    ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5384    if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5385        log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5386            u_errorName(status));
5387        printSeqErr((const unsigned char*)target_x, 3);
5388        printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5389    }
5390    ucnv_close(cnv);
5391}
5392#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5393
5394#if !UCONFIG_NO_COLLATION
5395
5396static void TestJitterbug981(){
5397    const UChar* rules;
5398    int32_t rules_length, target_cap, bytes_needed, buff_size;
5399    UErrorCode status = U_ZERO_ERROR;
5400    UConverter *utf8cnv;
5401    UCollator* myCollator;
5402    char *buff;
5403    int numNeeded=0;
5404    utf8cnv = ucnv_open ("utf8", &status);
5405    if(U_FAILURE(status)){
5406        log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5407        return;
5408    }
5409    myCollator = ucol_open("zh", &status);
5410    if(U_FAILURE(status)){
5411        log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5412        ucnv_close(utf8cnv);
5413        return;
5414    }
5415
5416    rules = ucol_getRules(myCollator, &rules_length);
5417    buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5418    buff = malloc(buff_size);
5419
5420    target_cap = 0;
5421    do {
5422        ucnv_reset(utf8cnv);
5423        status = U_ZERO_ERROR;
5424        if(target_cap >= buff_size) {
5425            log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5426            break;
5427        }
5428        bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5429            rules, rules_length, &status);
5430        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5431        if(numNeeded!=0 && numNeeded!= bytes_needed){
5432            log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5433            break;
5434        }
5435        numNeeded = bytes_needed;
5436    } while (status == U_BUFFER_OVERFLOW_ERROR);
5437    ucol_close(myCollator);
5438    ucnv_close(utf8cnv);
5439    free(buff);
5440}
5441
5442#endif
5443
5444static void TestJitterbug1293(){
5445    static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5446    char target[256];
5447    UErrorCode status = U_ZERO_ERROR;
5448    UConverter* conv=NULL;
5449    int32_t target_cap, bytes_needed, numNeeded = 0;
5450    conv = ucnv_open("shift-jis",&status);
5451    if(U_FAILURE(status)){
5452      log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5453      return;
5454    }
5455
5456    do{
5457        target_cap =0;
5458        bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5459        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5460        if(numNeeded!=0 && numNeeded!= bytes_needed){
5461          log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5462        }
5463        numNeeded = bytes_needed;
5464    } while (status == U_BUFFER_OVERFLOW_ERROR);
5465    if(U_FAILURE(status)){
5466      log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5467      return;
5468    }
5469    ucnv_close(conv);
5470}
5471static void TestJB5275_1(){
5472
5473    static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5474                                "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5475                                /* Switch script: */
5476                                "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5477                                "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5478                                "\xEF\x40\x3B\xB3\x0A";
5479    static const UChar expected[] ={
5480            0x003b, 0x0a15, 0x000a, /* Easy characters */
5481            0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5482            0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5483            0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5484            0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5485    };
5486
5487    UErrorCode status = U_ZERO_ERROR;
5488    UConverter* conv = ucnv_open("iscii-gur", &status);
5489    UChar dest[100] = {'\0'};
5490    UChar* target = dest;
5491    UChar* targetLimit = dest+100;
5492    const char* source = data;
5493    const char* sourceLimit = data+strlen(data);
5494    const UChar* exp = expected;
5495
5496    if (U_FAILURE(status)) {
5497        log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5498        return;
5499    }
5500
5501    log_verbose("Testing switching back to default script when new line is encountered.\n");
5502    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5503    if(U_FAILURE(status)){
5504        log_err("conversion failed: %s \n", u_errorName(status));
5505    }
5506    targetLimit = target;
5507    target = dest;
5508    printUSeq(target, targetLimit-target);
5509    while(target<targetLimit){
5510        if(*exp!=*target){
5511            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5512        }
5513        target++;
5514        exp++;
5515    }
5516    ucnv_close(conv);
5517}
5518
5519static void TestJB5275(){
5520    static const char* data =
5521    /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5522    /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5523    /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5524        "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5525        "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5526        "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5527        "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5528        "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5529        "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5530        /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5531    static const UChar expected[] ={
5532        0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5533        0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5534        0x0038, 0x0C95, 0x000A, /* Kannada test */
5535        0x0039, 0x0D15, 0x000A, /* Malayalam test */
5536        0x003A, 0x0A95, 0x000A, /* Gujarati test */
5537        0x003B, 0x0A15, 0x000A, /* Punjabi test */
5538    };
5539
5540    UErrorCode status = U_ZERO_ERROR;
5541    UConverter* conv = ucnv_open("iscii", &status);
5542    UChar dest[100] = {'\0'};
5543    UChar* target = dest;
5544    UChar* targetLimit = dest+100;
5545    const char* source = data;
5546    const char* sourceLimit = data+strlen(data);
5547    const UChar* exp = expected;
5548    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5549    if(U_FAILURE(status)){
5550        log_err("conversion failed: %s \n", u_errorName(status));
5551    }
5552    targetLimit = target;
5553    target = dest;
5554
5555    printUSeq(target, targetLimit-target);
5556
5557    while(target<targetLimit){
5558        if(*exp!=*target){
5559            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5560        }
5561        target++;
5562        exp++;
5563    }
5564    ucnv_close(conv);
5565}
5566
5567static void
5568TestIsFixedWidth() {
5569    UErrorCode status = U_ZERO_ERROR;
5570    UConverter *cnv = NULL;
5571    int32_t i;
5572
5573    const char *fixedWidth[] = {
5574            "US-ASCII",
5575            "UTF32",
5576            "ibm-5478_P100-1995"
5577    };
5578
5579    const char *notFixedWidth[] = {
5580            "GB18030",
5581            "UTF8",
5582            "windows-949-2000",
5583            "UTF16"
5584    };
5585
5586    for (i = 0; i < LENGTHOF(fixedWidth); i++) {
5587        cnv = ucnv_open(fixedWidth[i], &status);
5588        if (cnv == NULL || U_FAILURE(status)) {
5589            log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5590            continue;
5591        }
5592
5593        if (!ucnv_isFixedWidth(cnv, &status)) {
5594            log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5595        }
5596        ucnv_close(cnv);
5597    }
5598
5599    for (i = 0; i < LENGTHOF(notFixedWidth); i++) {
5600        cnv = ucnv_open(notFixedWidth[i], &status);
5601        if (cnv == NULL || U_FAILURE(status)) {
5602            log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5603            continue;
5604        }
5605
5606        if (ucnv_isFixedWidth(cnv, &status)) {
5607            log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5608        }
5609        ucnv_close(cnv);
5610    }
5611}
5612