1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2009, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File CCONVTST.C
9*
10* Modification History:
11*        Name                     Description
12*    Steven R. Loomis     7/8/1999      Adding input buffer test
13********************************************************************************
14*/
15#include <stdio.h>
16#include "cstring.h"
17#include "unicode/uloc.h"
18#include "unicode/ucnv.h"
19#include "unicode/ucnv_err.h"
20#include "unicode/ucnv_cb.h"
21#include "cintltst.h"
22#include "unicode/utypes.h"
23#include "unicode/ustring.h"
24#include "unicode/ucol.h"
25#include "cmemory.h"
26
27static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
28static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
29#if !UCONFIG_NO_COLLATION
30static void TestJitterbug981(void);
31#endif
32static void TestJitterbug1293(void);
33static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
34static void TestConverterTypesAndStarters(void);
35static void TestAmbiguous(void);
36static void TestSignatureDetection(void);
37static void TestUTF7(void);
38static void TestIMAP(void);
39static void TestUTF8(void);
40static void TestCESU8(void);
41static void TestUTF16(void);
42static void TestUTF16BE(void);
43static void TestUTF16LE(void);
44static void TestUTF32(void);
45static void TestUTF32BE(void);
46static void TestUTF32LE(void);
47static void TestLATIN1(void);
48
49#if !UCONFIG_NO_LEGACY_CONVERSION
50static void TestSBCS(void);
51static void TestDBCS(void);
52static void TestMBCS(void);
53
54#ifdef U_ENABLE_GENERIC_ISO_2022
55static void TestISO_2022(void);
56#endif
57
58static void TestISO_2022_JP(void);
59static void TestISO_2022_JP_1(void);
60static void TestISO_2022_JP_2(void);
61static void TestISO_2022_KR(void);
62static void TestISO_2022_KR_1(void);
63static void TestISO_2022_CN(void);
64static void TestISO_2022_CN_EXT(void);
65static void TestJIS(void);
66static void TestHZ(void);
67#endif
68
69static void TestSCSU(void);
70
71#if !UCONFIG_NO_LEGACY_CONVERSION
72static void TestEBCDIC_STATEFUL(void);
73static void TestGB18030(void);
74static void TestLMBCS(void);
75static void TestJitterbug255(void);
76static void TestEBCDICUS4XML(void);
77static void TestJitterbug915(void);
78static void TestISCII(void);
79
80static void TestCoverageMBCS(void);
81static void TestJitterbug2346(void);
82static void TestJitterbug2411(void);
83static void TestJB5275(void);
84static void TestJB5275_1(void);
85static void TestJitterbug6175(void);
86#endif
87
88static void TestRoundTrippingAllUTF(void);
89static void TestConv(const uint16_t in[],
90                     int len,
91                     const char* conv,
92                     const char* lang,
93                     char byteArr[],
94                     int byteArrLen);
95void addTestNewConvert(TestNode** root);
96
97/* open a converter, using test data if it begins with '@' */
98static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
99
100
101#define NEW_MAX_BUFFER 999
102
103static int32_t  gInBufferSize = NEW_MAX_BUFFER;
104static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
105static char     gNuConvTestName[1024];
106
107#define nct_min(x,y)  ((x<y) ? x : y)
108
109static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
110{
111  if(cnv && cnv[0] == '@') {
112    return ucnv_openPackage(loadTestData(err), cnv+1, err);
113  } else {
114    return ucnv_open(cnv, err);
115  }
116}
117
118static void printSeq(const unsigned char* a, int len)
119{
120    int i=0;
121    log_verbose("{");
122    while (i<len)
123        log_verbose("0x%02x ", a[i++]);
124    log_verbose("}\n");
125}
126
127static void printUSeq(const UChar* a, int len)
128{
129    int i=0;
130    log_verbose("{U+");
131    while (i<len) log_verbose("0x%04x ", a[i++]);
132    log_verbose("}\n");
133}
134
135static void printSeqErr(const unsigned char* a, int len)
136{
137    int i=0;
138    fprintf(stderr, "{");
139    while (i<len)
140        fprintf(stderr, "0x%02x ", a[i++]);
141    fprintf(stderr, "}\n");
142}
143
144static void printUSeqErr(const UChar* a, int len)
145{
146    int i=0;
147    fprintf(stderr, "{U+");
148    while (i<len)
149        fprintf(stderr, "0x%04x ", a[i++]);
150    fprintf(stderr,"}\n");
151}
152
153static void
154TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
155{
156     const char* s0;
157     const char* s=(char*)source;
158     const int32_t *r=results;
159     UErrorCode errorCode=U_ZERO_ERROR;
160     UChar32 c;
161
162     while(s<limit) {
163        s0=s;
164        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
165        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
166            break; /* no more significant input */
167        } else if(U_FAILURE(errorCode)) {
168            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
169            break;
170        } else if(
171            /* test the expected number of input bytes only if >=0 */
172            (*r>=0 && (int32_t)(s-s0)!=*r) ||
173            c!=*(r+1)
174        ) {
175            log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
176                message, c, (s-s0), *(r+1), *r);
177            break;
178        }
179        r+=2;
180    }
181}
182
183static void
184TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
185{
186     const char* s=(char*)source;
187     UErrorCode errorCode=U_ZERO_ERROR;
188     uint32_t c;
189     c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
190     if(errorCode != expected){
191        log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
192     }
193     if(c != 0xFFFD && c != 0xffff){
194        log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
195     }
196
197}
198
199static void TestInBufSizes(void)
200{
201  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
202#if 1
203  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
204  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
205  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
206  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
207  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
208  TestNewConvertWithBufferSizes(1,1);
209  TestNewConvertWithBufferSizes(2,3);
210  TestNewConvertWithBufferSizes(3,2);
211#endif
212}
213
214static void TestOutBufSizes(void)
215{
216#if 1
217  TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
218  TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
219  TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
220  TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
221  TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
222  TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
223
224#endif
225}
226
227
228void addTestNewConvert(TestNode** root)
229{
230   addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
231   addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
232   addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
233   addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
234   addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
235   addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
236   addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
237   addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
238
239   /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
240   addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
241   addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
242   addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
243   addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
244   addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
245   addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
246   addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
247
248#if !UCONFIG_NO_LEGACY_CONVERSION
249   addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
250#endif
251
252   addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
253
254#if !UCONFIG_NO_LEGACY_CONVERSION
255   addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
256   addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
257   addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
258
259#ifdef U_ENABLE_GENERIC_ISO_2022
260   addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
261#endif
262
263   addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
264   addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
265   addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
266   addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
267   addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
268   addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
269   addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
270   addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
271   addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
272   addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
273#endif
274
275   addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
276
277#if !UCONFIG_NO_LEGACY_CONVERSION
278   addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
279   addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
280   addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
281   addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
282   addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
283   addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
284   addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
285#if !UCONFIG_NO_COLLATION
286   addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
287#endif
288
289   addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
290#endif
291
292
293#if !UCONFIG_NO_LEGACY_CONVERSION
294   addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
295#endif
296
297   addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
298
299#if !UCONFIG_NO_LEGACY_CONVERSION
300   addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
301   addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
302   addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
303#endif
304
305}
306
307
308/* Note that this test already makes use of statics, so it's not really
309   multithread safe.
310   This convenience function lets us make the error messages actually useful.
311*/
312
313static void setNuConvTestName(const char *codepage, const char *direction)
314{
315    sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
316        codepage,
317        direction,
318        (int)gInBufferSize,
319        (int)gOutBufferSize);
320}
321
322typedef enum
323{
324  TC_OK       = 0,  /* test was OK */
325  TC_MISMATCH = 1,  /* Match failed - err was printed */
326  TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
327} ETestConvertResult;
328
329/* Note: This function uses global variables and it will not do offset
330checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
331static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
332                const char *codepage, const int32_t *expectOffsets , UBool useFallback)
333{
334    UErrorCode status = U_ZERO_ERROR;
335    UConverter *conv = 0;
336    char    junkout[NEW_MAX_BUFFER]; /* FIX */
337    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
338    char *p;
339    const UChar *src;
340    char *end;
341    char *targ;
342    int32_t *offs;
343    int i;
344    int32_t   realBufferSize;
345    char *realBufferEnd;
346    const UChar *realSourceEnd;
347    const UChar *sourceLimit;
348    UBool checkOffsets = TRUE;
349    UBool doFlush;
350
351    for(i=0;i<NEW_MAX_BUFFER;i++)
352        junkout[i] = (char)0xF0;
353    for(i=0;i<NEW_MAX_BUFFER;i++)
354        junokout[i] = 0xFF;
355
356    setNuConvTestName(codepage, "FROM");
357
358    log_verbose("\n=========  %s\n", gNuConvTestName);
359
360    conv = my_ucnv_open(codepage, &status);
361
362    if(U_FAILURE(status))
363    {
364        log_data_err("Couldn't open converter %s\n",codepage);
365        return TC_FAIL;
366    }
367    if(useFallback){
368        ucnv_setFallback(conv,useFallback);
369    }
370
371    log_verbose("Converter opened..\n");
372
373    src = source;
374    targ = junkout;
375    offs = junokout;
376
377    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
378    realBufferEnd = junkout + realBufferSize;
379    realSourceEnd = source + sourceLen;
380
381    if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
382        checkOffsets = FALSE;
383
384    do
385    {
386      end = nct_min(targ + gOutBufferSize, realBufferEnd);
387      sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
388
389      doFlush = (UBool)(sourceLimit == realSourceEnd);
390
391      if(targ == realBufferEnd) {
392        log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
393        return TC_FAIL;
394      }
395      log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
396
397
398      status = U_ZERO_ERROR;
399
400      ucnv_fromUnicode (conv,
401                        &targ,
402                        end,
403                        &src,
404                        sourceLimit,
405                        checkOffsets ? offs : NULL,
406                        doFlush, /* flush if we're at the end of the input data */
407                        &status);
408    } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
409
410    if(U_FAILURE(status)) {
411      log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
412      return TC_FAIL;
413    }
414
415    log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
416                sourceLen, targ-junkout);
417
418    if(VERBOSITY)
419    {
420      char junk[9999];
421      char offset_str[9999];
422      char *ptr;
423
424      junk[0] = 0;
425      offset_str[0] = 0;
426      for(ptr = junkout;ptr<targ;ptr++) {
427        sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
428        sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
429      }
430
431      log_verbose(junk);
432      printSeq((const uint8_t *)expect, expectLen);
433      if ( checkOffsets ) {
434        log_verbose("\nOffsets:");
435        log_verbose(offset_str);
436      }
437      log_verbose("\n");
438    }
439    ucnv_close(conv);
440
441    if(expectLen != targ-junkout) {
442      log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
443      log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
444      printf("\nGot:");
445      printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
446      printf("\nExpected:");
447      printSeqErr((const unsigned char*)expect, expectLen);
448      return TC_MISMATCH;
449    }
450
451    if (checkOffsets && (expectOffsets != 0) ) {
452      log_verbose("comparing %d offsets..\n", targ-junkout);
453      if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
454        log_err("did not get the expected offsets. %s\n", gNuConvTestName);
455        printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
456        log_err("\n");
457        log_err("Got  :     ");
458        for(p=junkout;p<targ;p++) {
459          log_err("%d,", junokout[p-junkout]);
460        }
461        log_err("\n");
462        log_err("Expected:  ");
463        for(i=0; i<(targ-junkout); i++) {
464          log_err("%d,", expectOffsets[i]);
465        }
466        log_err("\n");
467      }
468    }
469
470    log_verbose("comparing..\n");
471    if(!memcmp(junkout, expect, expectLen)) {
472      log_verbose("Matches!\n");
473      return TC_OK;
474    } else {
475      log_err("String does not match u->%s\n", gNuConvTestName);
476      printUSeqErr(source, sourceLen);
477      printf("\nGot:");
478      printSeqErr((const unsigned char *)junkout, expectLen);
479      printf("\nExpected:");
480      printSeqErr((const unsigned char *)expect, expectLen);
481
482      return TC_MISMATCH;
483    }
484}
485
486/* Note: This function uses global variables and it will not do offset
487checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
488static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
489                                          const char *codepage, const int32_t *expectOffsets, UBool useFallback)
490{
491    UErrorCode status = U_ZERO_ERROR;
492    UConverter *conv = 0;
493    UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
494    int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
495    const char *src;
496    const char *realSourceEnd;
497    const char *srcLimit;
498    UChar *p;
499    UChar *targ;
500    UChar *end;
501    int32_t *offs;
502    int i;
503    UBool   checkOffsets = TRUE;
504
505    int32_t   realBufferSize;
506    UChar *realBufferEnd;
507
508
509    for(i=0;i<NEW_MAX_BUFFER;i++)
510        junkout[i] = 0xFFFE;
511
512    for(i=0;i<NEW_MAX_BUFFER;i++)
513        junokout[i] = -1;
514
515    setNuConvTestName(codepage, "TO");
516
517    log_verbose("\n=========  %s\n", gNuConvTestName);
518
519    conv = my_ucnv_open(codepage, &status);
520
521    if(U_FAILURE(status))
522    {
523        log_data_err("Couldn't open converter %s\n",gNuConvTestName);
524        return TC_FAIL;
525    }
526    if(useFallback){
527        ucnv_setFallback(conv,useFallback);
528    }
529    log_verbose("Converter opened..\n");
530
531    src = (const char *)source;
532    targ = junkout;
533    offs = junokout;
534
535    realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
536    realBufferEnd = junkout + realBufferSize;
537    realSourceEnd = src + sourcelen;
538
539    if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
540        checkOffsets = FALSE;
541
542    do
543    {
544        end = nct_min( targ + gOutBufferSize, realBufferEnd);
545        srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
546
547        if(targ == realBufferEnd)
548        {
549            log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
550            return TC_FAIL;
551        }
552        log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
553
554        /* oldTarg = targ; */
555
556        status = U_ZERO_ERROR;
557
558        ucnv_toUnicode (conv,
559                &targ,
560                end,
561                &src,
562                srcLimit,
563                checkOffsets ? offs : NULL,
564                (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
565                &status);
566
567        /*        offs += (targ-oldTarg); */
568
569      } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
570
571    if(U_FAILURE(status))
572    {
573        log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
574        return TC_FAIL;
575    }
576
577    log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
578        sourcelen, targ-junkout);
579    if(VERBOSITY)
580    {
581        char junk[9999];
582        char offset_str[9999];
583        UChar *ptr;
584
585        junk[0] = 0;
586        offset_str[0] = 0;
587
588        for(ptr = junkout;ptr<targ;ptr++)
589        {
590            sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
591            sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
592        }
593
594        log_verbose(junk);
595        printUSeq(expect, expectlen);
596        if ( checkOffsets )
597          {
598            log_verbose("\nOffsets:");
599            log_verbose(offset_str);
600          }
601        log_verbose("\n");
602    }
603    ucnv_close(conv);
604
605    log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
606
607    if (checkOffsets && (expectOffsets != 0))
608    {
609        if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
610            log_err("did not get the expected offsets. %s\n",gNuConvTestName);
611            log_err("Got:      ");
612            for(p=junkout;p<targ;p++) {
613                log_err("%d,", junokout[p-junkout]);
614            }
615            log_err("\n");
616            log_err("Expected: ");
617            for(i=0; i<(targ-junkout); i++) {
618                log_err("%d,", expectOffsets[i]);
619            }
620            log_err("\n");
621            log_err("output:   ");
622            for(i=0; i<(targ-junkout); i++) {
623                log_err("%X,", junkout[i]);
624            }
625            log_err("\n");
626            log_err("input:    ");
627            for(i=0; i<(src-(const char *)source); i++) {
628                log_err("%X,", (unsigned char)source[i]);
629            }
630            log_err("\n");
631        }
632    }
633
634    if(!memcmp(junkout, expect, expectlen*2))
635    {
636        log_verbose("Matches!\n");
637        return TC_OK;
638    }
639    else
640    {
641        log_err("String does not match. %s\n", gNuConvTestName);
642        log_verbose("String does not match. %s\n", gNuConvTestName);
643        printf("\nGot:");
644        printUSeqErr(junkout, expectlen);
645        printf("\nExpected:");
646        printUSeqErr(expect, expectlen);
647        return TC_MISMATCH;
648    }
649}
650
651
652static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
653{
654/** test chars #1 */
655    /*  1 2 3  1Han 2Han 3Han .  */
656    static const UChar   sampleText[] =
657     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
658    static const UChar sampleTextRoundTripUnmappable[] =
659    { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
660
661
662    static const uint8_t expectedUTF8[] =
663     { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
664    static const int32_t toUTF8Offs[] =
665     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
666    static const int32_t fmUTF8Offs[] =
667     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
668
669#ifdef U_ENABLE_GENERIC_ISO_2022
670    /* Same as UTF8, but with ^[%B preceeding */
671    static const const uint8_t expectedISO2022[] =
672     { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
673    static const int32_t toISO2022Offs[]     =
674     { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
675       0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
676    static const int32_t fmISO2022Offs[] =
677     { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
678#endif
679
680    /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
681    static const uint8_t expectedIBM930[] =
682     { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
683    static const int32_t toIBM930Offs[] =
684     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
685    static const int32_t fmIBM930Offs[] =
686     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
687
688    /* 1 2 3 0 h1 h2 h3 . MBCS*/
689    static const uint8_t expectedIBM943[] =
690     {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
691    static const int32_t toIBM943Offs    [] =
692     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
693    static const int32_t fmIBM943Offs[] =
694     { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
695
696    /* 1 2 3 0 h1 h2 h3 . DBCS*/
697    static const uint8_t expectedIBM9027[] =
698     {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
699    static const int32_t toIBM9027Offs    [] =
700     {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
701
702     /* 1 2 3 0 <?> <?> <?> . SBCS*/
703    static const uint8_t expectedIBM920[] =
704     {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
705    static const int32_t toIBM920Offs    [] =
706     {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
707
708    /* 1 2 3 0 <?> <?> <?> . SBCS*/
709    static const uint8_t expectedISO88593[] =
710     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
711    static const int32_t toISO88593Offs[]     =
712     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
713
714    /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
715    static const uint8_t expectedLATIN1[] =
716     { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
717    static const int32_t toLATIN1Offs[]     =
718     { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
719
720
721    /*  etc */
722    static const uint8_t expectedUTF16BE[] =
723     { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
724    static const int32_t toUTF16BEOffs[]=
725     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
726    static const int32_t fmUTF16BEOffs[] =
727     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
728
729    static const uint8_t expectedUTF16LE[] =
730     { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
731    static const int32_t toUTF16LEOffs[]=
732     { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
733    static const int32_t fmUTF16LEOffs[] =
734     { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
735
736    static const uint8_t expectedUTF32BE[] =
737     { 0x00, 0x00, 0x00, 0x31,
738       0x00, 0x00, 0x00, 0x32,
739       0x00, 0x00, 0x00, 0x33,
740       0x00, 0x00, 0x00, 0x00,
741       0x00, 0x00, 0x4e, 0x00,
742       0x00, 0x00, 0x4e, 0x8c,
743       0x00, 0x00, 0x4e, 0x09,
744       0x00, 0x00, 0x00, 0x2e,
745       0x00, 0x02, 0x00, 0x21 };
746    static const int32_t toUTF32BEOffs[]=
747     { 0x00, 0x00, 0x00, 0x00,
748       0x01, 0x01, 0x01, 0x01,
749       0x02, 0x02, 0x02, 0x02,
750       0x03, 0x03, 0x03, 0x03,
751       0x04, 0x04, 0x04, 0x04,
752       0x05, 0x05, 0x05, 0x05,
753       0x06, 0x06, 0x06, 0x06,
754       0x07, 0x07, 0x07, 0x07,
755       0x08, 0x08, 0x08, 0x08,
756       0x08, 0x08, 0x08, 0x08 };
757    static const int32_t fmUTF32BEOffs[] =
758     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
759
760    static const uint8_t expectedUTF32LE[] =
761     { 0x31, 0x00, 0x00, 0x00,
762       0x32, 0x00, 0x00, 0x00,
763       0x33, 0x00, 0x00, 0x00,
764       0x00, 0x00, 0x00, 0x00,
765       0x00, 0x4e, 0x00, 0x00,
766       0x8c, 0x4e, 0x00, 0x00,
767       0x09, 0x4e, 0x00, 0x00,
768       0x2e, 0x00, 0x00, 0x00,
769       0x21, 0x00, 0x02, 0x00 };
770    static const int32_t toUTF32LEOffs[]=
771     { 0x00, 0x00, 0x00, 0x00,
772       0x01, 0x01, 0x01, 0x01,
773       0x02, 0x02, 0x02, 0x02,
774       0x03, 0x03, 0x03, 0x03,
775       0x04, 0x04, 0x04, 0x04,
776       0x05, 0x05, 0x05, 0x05,
777       0x06, 0x06, 0x06, 0x06,
778       0x07, 0x07, 0x07, 0x07,
779       0x08, 0x08, 0x08, 0x08,
780       0x08, 0x08, 0x08, 0x08 };
781    static const int32_t fmUTF32LEOffs[] =
782     { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
783
784
785
786
787/** Test chars #2 **/
788
789    /* Sahha [health],  slashed h's */
790    static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
791    static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
792
793    /* LMBCS */
794    static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
795    static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
796    static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
797    static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
798    /*********************************** START OF CODE finally *************/
799
800    gInBufferSize = insize;
801    gOutBufferSize = outsize;
802
803    log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
804
805
806    /*UTF-8*/
807    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
808        expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
809
810    log_verbose("Test surrogate behaviour for UTF8\n");
811    {
812        static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
813        static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
814                           0xf0, 0x90, 0x90, 0x81,
815                           0xef, 0xbf, 0xbd
816        };
817        static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
818        testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
819                         expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
820
821
822    }
823
824#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
825    /*ISO-2022*/
826    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
827        expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
828#endif
829
830    /*UTF16 LE*/
831    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
832        expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
833    /*UTF16 BE*/
834    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
835        expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
836    /*UTF32 LE*/
837    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
838        expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
839    /*UTF32 BE*/
840    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
841        expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
842
843    /*LATIN_1*/
844    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
845        expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
846
847#if !UCONFIG_NO_LEGACY_CONVERSION
848    /*EBCDIC_STATEFUL*/
849    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
850        expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
851
852    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
853        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
854
855    /*MBCS*/
856
857    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
858        expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
859    /*DBCS*/
860    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
861        expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
862    /*SBCS*/
863    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
864        expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
865    /*SBCS*/
866    testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
867        expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
868#endif
869
870
871/****/
872
873    /*UTF-8*/
874    testConvertToU(expectedUTF8, sizeof(expectedUTF8),
875        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
876#if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
877    /*ISO-2022*/
878    testConvertToU(expectedISO2022, sizeof(expectedISO2022),
879        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
880#endif
881
882    /*UTF16 LE*/
883    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
884        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
885    /*UTF16 BE*/
886    testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
887        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
888    /*UTF32 LE*/
889    testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
890        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
891    /*UTF32 BE*/
892    testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
893        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
894
895#if !UCONFIG_NO_LEGACY_CONVERSION
896    /*EBCDIC_STATEFUL*/
897    testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
898            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE);
899    /*MBCS*/
900    testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
901            sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE);
902#endif
903
904    /* Try it again to make sure it still works */
905    testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
906        sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
907
908#if !UCONFIG_NO_LEGACY_CONVERSION
909    testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
910        malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
911
912    testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
913        expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
914
915    /*LMBCS*/
916    testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
917        expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
918    testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
919        LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
920#endif
921
922    /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
923    {
924        /* encode directly set D and set O */
925        static const uint8_t utf7[] = {
926            /*
927                Hi Mom -+Jjo--!
928                A+ImIDkQ.
929                +-
930                +ZeVnLIqe
931            */
932            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
933            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
934            0x2b, 0x2d,
935            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
936        };
937        static const UChar unicode[] = {
938            /*
939                Hi Mom -<WHITE SMILING FACE>-!
940                A<NOT IDENTICAL TO><ALPHA>.
941                +
942                [Japanese word "nihongo"]
943            */
944            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
945            0x41, 0x2262, 0x0391, 0x2e,
946            0x2b,
947            0x65e5, 0x672c, 0x8a9e
948        };
949        static const int32_t toUnicodeOffsets[] = {
950            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
951            15, 17, 19, 23,
952            24,
953            27, 29, 32
954        };
955        static const int32_t fromUnicodeOffsets[] = {
956            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
957            11, 12, 12, 12, 13, 13, 13, 13, 14,
958            15, 15,
959            16, 16, 16, 17, 17, 17, 18, 18, 18
960        };
961
962        /* same but escaping set O (the exclamation mark) */
963        static const uint8_t utf7Restricted[] = {
964            /*
965                Hi Mom -+Jjo--+ACE-
966                A+ImIDkQ.
967                +-
968                +ZeVnLIqe
969            */
970            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
971            0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
972            0x2b, 0x2d,
973            0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
974        };
975        static const int32_t toUnicodeOffsetsR[] = {
976            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
977            19, 21, 23, 27,
978            28,
979            31, 33, 36
980        };
981        static const int32_t fromUnicodeOffsetsR[] = {
982            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
983            11, 12, 12, 12, 13, 13, 13, 13, 14,
984            15, 15,
985            16, 16, 16, 17, 17, 17, 18, 18, 18
986        };
987
988        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
989
990        testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
991
992        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
993
994        testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
995    }
996
997    /*
998     * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
999     * modified according to RFC 2060,
1000     * and supplemented with the one example in RFC 2060 itself.
1001     */
1002    {
1003        static const uint8_t imap[] = {
1004            /*  Hi Mom -&Jjo--!
1005                A&ImIDkQ-.
1006                &-
1007                &ZeVnLIqe-
1008                \
1009                ~peter
1010                /mail
1011                /&ZeVnLIqe-
1012                /&U,BTFw-
1013            */
1014            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1015            0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1016            0x26, 0x2d,
1017            0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1018            0x5c,
1019            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1020            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1021            0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1022            0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1023        };
1024        static const UChar unicode[] = {
1025            /*  Hi Mom -<WHITE SMILING FACE>-!
1026                A<NOT IDENTICAL TO><ALPHA>.
1027                &
1028                [Japanese word "nihongo"]
1029                \
1030                ~peter
1031                /mail
1032                /<65e5, 672c, 8a9e>
1033                /<53f0, 5317>
1034            */
1035            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1036            0x41, 0x2262, 0x0391, 0x2e,
1037            0x26,
1038            0x65e5, 0x672c, 0x8a9e,
1039            0x5c,
1040            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1041            0x2f, 0x6d, 0x61, 0x69, 0x6c,
1042            0x2f, 0x65e5, 0x672c, 0x8a9e,
1043            0x2f, 0x53f0, 0x5317
1044        };
1045        static const int32_t toUnicodeOffsets[] = {
1046            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1047            15, 17, 19, 24,
1048            25,
1049            28, 30, 33,
1050            37,
1051            38, 39, 40, 41, 42, 43,
1052            44, 45, 46, 47, 48,
1053            49, 51, 53, 56,
1054            60, 62, 64
1055        };
1056        static const int32_t fromUnicodeOffsets[] = {
1057            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1058            11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1059            15, 15,
1060            16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1061            19,
1062            20, 21, 22, 23, 24, 25,
1063            26, 27, 28, 29, 30,
1064            31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1065            35, 36, 36, 36, 37, 37, 37, 37, 37
1066        };
1067
1068        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1069
1070        testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1071    }
1072
1073    /* Test UTF-8 bad data handling*/
1074    {
1075        static const uint8_t utf8[]={
1076            0x61,
1077            0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1078            0x00,
1079            0x62,
1080            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1081            0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1082            0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1083            0xdf, 0xbf,                     /* 7ff */
1084            0xbf,                           /* truncated tail */
1085            0xf4, 0x90, 0x80, 0x80,         /* 11FFFF */
1086            0x02
1087        };
1088
1089        static const uint16_t utf8Expected[]={
1090            0x0061,
1091            0xfffd,
1092            0x0000,
1093            0x0062,
1094            0xfffd,
1095            0xfffd,
1096            0xdbff, 0xdfff,
1097            0x07ff,
1098            0xfffd,
1099            0xfffd,
1100            0x0002
1101        };
1102
1103        static const int32_t utf8Offsets[]={
1104            0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1105        };
1106        testConvertToU(utf8, sizeof(utf8),
1107                       utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1108
1109    }
1110
1111    /* Test UTF-32BE bad data handling*/
1112    {
1113        static const uint8_t utf32[]={
1114            0x00, 0x00, 0x00, 0x61,
1115            0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1116            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1117            0x00, 0x00, 0x00, 0x62,
1118            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1119            0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1120            0x00, 0x00, 0x01, 0x62,
1121            0x00, 0x00, 0x02, 0x62
1122        };
1123        static const uint16_t utf32Expected[]={
1124            0x0061,
1125            0xfffd,         /* 0x110000 out of range */
1126            0xDBFF,         /* 0x10FFFF in range */
1127            0xDFFF,
1128            0x0062,
1129            0xfffd,         /* 0xffffffff out of range */
1130            0xfffd,         /* 0x7fffffff out of range */
1131            0x0162,
1132            0x0262
1133        };
1134        static const int32_t utf32Offsets[]={
1135            0, 4, 8, 8, 12, 16, 20, 24, 28
1136        };
1137        static const uint8_t utf32ExpectedBack[]={
1138            0x00, 0x00, 0x00, 0x61,
1139            0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1140            0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1141            0x00, 0x00, 0x00, 0x62,
1142            0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1143            0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1144            0x00, 0x00, 0x01, 0x62,
1145            0x00, 0x00, 0x02, 0x62
1146        };
1147        static const int32_t utf32OffsetsBack[]={
1148            0,0,0,0,
1149            1,1,1,1,
1150            2,2,2,2,
1151            4,4,4,4,
1152            5,5,5,5,
1153            6,6,6,6,
1154            7,7,7,7,
1155            8,8,8,8
1156        };
1157
1158        testConvertToU(utf32, sizeof(utf32),
1159                       utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1160        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1161            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1162    }
1163
1164    /* Test UTF-32LE bad data handling*/
1165    {
1166        static const uint8_t utf32[]={
1167            0x61, 0x00, 0x00, 0x00,
1168            0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1169            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1170            0x62, 0x00, 0x00, 0x00,
1171            0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1172            0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1173            0x62, 0x01, 0x00, 0x00,
1174            0x62, 0x02, 0x00, 0x00,
1175        };
1176
1177        static const uint16_t utf32Expected[]={
1178            0x0061,
1179            0xfffd,         /* 0x110000 out of range */
1180            0xDBFF,         /* 0x10FFFF in range */
1181            0xDFFF,
1182            0x0062,
1183            0xfffd,         /* 0xffffffff out of range */
1184            0xfffd,         /* 0x7fffffff out of range */
1185            0x0162,
1186            0x0262
1187        };
1188        static const int32_t utf32Offsets[]={
1189            0, 4, 8, 8, 12, 16, 20, 24, 28
1190        };
1191        static const uint8_t utf32ExpectedBack[]={
1192            0x61, 0x00, 0x00, 0x00,
1193            0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1194            0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1195            0x62, 0x00, 0x00, 0x00,
1196            0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1197            0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1198            0x62, 0x01, 0x00, 0x00,
1199            0x62, 0x02, 0x00, 0x00
1200        };
1201        static const int32_t utf32OffsetsBack[]={
1202            0,0,0,0,
1203            1,1,1,1,
1204            2,2,2,2,
1205            4,4,4,4,
1206            5,5,5,5,
1207            6,6,6,6,
1208            7,7,7,7,
1209            8,8,8,8
1210        };
1211        testConvertToU(utf32, sizeof(utf32),
1212            utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1213        testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1214            utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1215    }
1216}
1217
1218static void TestCoverageMBCS(){
1219#if 0
1220    UErrorCode status = U_ZERO_ERROR;
1221    const char *directory = loadTestData(&status);
1222    char* tdpath = NULL;
1223    char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1224    int len = strlen(directory);
1225    char* index=NULL;
1226
1227    tdpath = (char*) malloc(sizeof(char) * (len * 2));
1228    uprv_strcpy(saveDirectory,u_getDataDirectory());
1229    log_verbose("Retrieved data directory %s \n",saveDirectory);
1230    uprv_strcpy(tdpath,directory);
1231    index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1232
1233    if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1234            *(index+1)=0;
1235    }
1236    u_setDataDirectory(tdpath);
1237    log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1238#endif
1239
1240    /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1241      which is test file for MBCS conversion with single-byte codepage data.*/
1242    {
1243
1244        /* MBCS with single byte codepage data test1.ucm*/
1245        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1246        const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1247        int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1248
1249        /*from Unicode*/
1250        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1251            expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1252    }
1253
1254    /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1255      which is test file for MBCS conversion with three-byte codepage data.*/
1256    {
1257
1258        /* MBCS with three byte codepage data test3.ucm*/
1259        const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1260        const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1261        int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1262
1263        const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1264        const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1265        int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1266
1267        /*from Unicode*/
1268        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1269            expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1270
1271        /*to Unicode*/
1272        testConvertToU(test3input, sizeof(test3input),
1273            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1274
1275    }
1276
1277    /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1278      which is test file for MBCS conversion with four-byte codepage data.*/
1279    {
1280
1281        /* MBCS with three byte codepage data test4.ucm*/
1282        static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1283        static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1284        static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1285
1286        static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1287        static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1288        static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1289
1290        /*from Unicode*/
1291        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1292            expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1293
1294        /*to Unicode*/
1295        testConvertToU(test4input, sizeof(test4input),
1296            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1297
1298    }
1299#if 0
1300    free(tdpath);
1301    /* restore the original data directory */
1302    log_verbose("Setting the data directory to %s \n", saveDirectory);
1303    u_setDataDirectory(saveDirectory);
1304    free(saveDirectory);
1305#endif
1306
1307}
1308
1309static void TestConverterType(const char *convName, UConverterType convType) {
1310    UConverter* myConverter;
1311    UErrorCode err = U_ZERO_ERROR;
1312
1313    myConverter = my_ucnv_open(convName, &err);
1314
1315    if (U_FAILURE(err)) {
1316        log_data_err("Failed to create an %s converter\n", convName);
1317        return;
1318    }
1319    else
1320    {
1321        if (ucnv_getType(myConverter)!=convType) {
1322            log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1323                convName, convType);
1324        }
1325        else {
1326            log_verbose("ucnv_getType %s ok\n", convName);
1327        }
1328    }
1329    ucnv_close(myConverter);
1330}
1331
1332static void TestConverterTypesAndStarters()
1333{
1334#if !UCONFIG_NO_LEGACY_CONVERSION
1335    UConverter* myConverter;
1336    UErrorCode err = U_ZERO_ERROR;
1337    UBool mystarters[256];
1338
1339/*    const UBool expectedKSCstarters[256] = {
1340        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1341        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1342        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1343        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1344        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1345        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1346        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1347        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1348        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1349        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1350        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1351        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1352        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1353        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1354        FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1355        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1356        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1357        TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1358        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1359        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1360        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1361        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1362        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1363        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1364        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1365        TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1366
1367
1368    log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1369
1370    myConverter = ucnv_open("ksc", &err);
1371    if (U_FAILURE(err)) {
1372      log_data_err("Failed to create an ibm-ksc converter\n");
1373      return;
1374    }
1375    else
1376    {
1377        if (ucnv_getType(myConverter)!=UCNV_MBCS)
1378            log_err("ucnv_getType Failed for ibm-949\n");
1379        else
1380            log_verbose("ucnv_getType ibm-949 ok\n");
1381
1382        if(myConverter!=NULL)
1383            ucnv_getStarters(myConverter, mystarters, &err);
1384
1385        /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1386          log_err("Failed ucnv_getStarters for ksc\n");
1387          else
1388          log_verbose("ucnv_getStarters ok\n");*/
1389
1390    }
1391    ucnv_close(myConverter);
1392
1393    TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1394    TestConverterType("ibm-878", UCNV_SBCS);
1395#endif
1396
1397    TestConverterType("iso-8859-1", UCNV_LATIN_1);
1398
1399    TestConverterType("ibm-1208", UCNV_UTF8);
1400
1401    TestConverterType("utf-8", UCNV_UTF8);
1402    TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1403    TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1404    TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1405    TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1406
1407#if !UCONFIG_NO_LEGACY_CONVERSION
1408
1409#if defined(U_ENABLE_GENERIC_ISO_2022)
1410    TestConverterType("iso-2022", UCNV_ISO_2022);
1411#endif
1412
1413    TestConverterType("hz", UCNV_HZ);
1414#endif
1415
1416    TestConverterType("scsu", UCNV_SCSU);
1417
1418#if !UCONFIG_NO_LEGACY_CONVERSION
1419    TestConverterType("x-iscii-de", UCNV_ISCII);
1420#endif
1421
1422    TestConverterType("ascii", UCNV_US_ASCII);
1423    TestConverterType("utf-7", UCNV_UTF7);
1424    TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1425    TestConverterType("bocu-1", UCNV_BOCU1);
1426}
1427
1428static void
1429TestAmbiguousConverter(UConverter *cnv) {
1430    static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1431    UChar outUnicode[20]={ 0, 0, 0, 0 };
1432
1433    const char *s;
1434    UChar *u;
1435    UErrorCode errorCode;
1436    UBool isAmbiguous;
1437
1438    /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1439    errorCode=U_ZERO_ERROR;
1440    s=inBytes;
1441    u=outUnicode;
1442    ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1443    if(U_FAILURE(errorCode)) {
1444        /* we do not care about general failures in this test; the input may just not be mappable */
1445        return;
1446    }
1447
1448    if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1449        /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1450        /* There are some encodings that are partially ASCII based,
1451        like the ISO-7 and GSM series of codepages, which we ignore. */
1452        return;
1453    }
1454
1455    isAmbiguous=ucnv_isAmbiguous(cnv);
1456
1457    /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1458    if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1459        log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1460            ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1461        return;
1462    }
1463
1464    if(outUnicode[2]!=0x5c) {
1465        /* needs fixup, fix it */
1466        ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1467        if(outUnicode[2]!=0x5c) {
1468            /* the fix failed */
1469            log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1470            return;
1471        }
1472    }
1473}
1474
1475static void TestAmbiguous()
1476{
1477    UErrorCode status = U_ZERO_ERROR;
1478    UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1479    static const char target[] = {
1480        /* "\\usr\\local\\share\\data\\icutest.txt" */
1481        0x5c, 0x75, 0x73, 0x72,
1482        0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1483        0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1484        0x5c, 0x64, 0x61, 0x74, 0x61,
1485        0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1486        0
1487    };
1488    UChar asciiResult[200], sjisResult[200];
1489    int32_t asciiLength = 0, sjisLength = 0, i;
1490    const char *name;
1491
1492    /* enumerate all converters */
1493    status=U_ZERO_ERROR;
1494    for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1495        cnv=ucnv_open(name, &status);
1496        if(U_SUCCESS(status)) {
1497            TestAmbiguousConverter(cnv);
1498            ucnv_close(cnv);
1499        } else {
1500            log_err("error: unable to open available converter \"%s\"\n", name);
1501            status=U_ZERO_ERROR;
1502        }
1503    }
1504
1505#if !UCONFIG_NO_LEGACY_CONVERSION
1506    sjis_cnv = ucnv_open("ibm-943", &status);
1507    if (U_FAILURE(status))
1508    {
1509        log_data_err("Failed to create a SJIS converter\n");
1510        return;
1511    }
1512    ascii_cnv = ucnv_open("LATIN-1", &status);
1513    if (U_FAILURE(status))
1514    {
1515        log_data_err("Failed to create a LATIN-1 converter\n");
1516        ucnv_close(sjis_cnv);
1517        return;
1518    }
1519    /* convert target from SJIS to Unicode */
1520    sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1521    if (U_FAILURE(status))
1522    {
1523        log_err("Failed to convert the SJIS string.\n");
1524        ucnv_close(sjis_cnv);
1525        ucnv_close(ascii_cnv);
1526        return;
1527    }
1528    /* convert target from Latin-1 to Unicode */
1529    asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1530    if (U_FAILURE(status))
1531    {
1532        log_err("Failed to convert the Latin-1 string.\n");
1533        ucnv_close(sjis_cnv);
1534        ucnv_close(ascii_cnv);
1535        return;
1536    }
1537    if (!ucnv_isAmbiguous(sjis_cnv))
1538    {
1539        log_err("SJIS converter should contain ambiguous character mappings.\n");
1540        ucnv_close(sjis_cnv);
1541        ucnv_close(ascii_cnv);
1542        return;
1543    }
1544    if (u_strcmp(sjisResult, asciiResult) == 0)
1545    {
1546        log_err("File separators for SJIS don't need to be fixed.\n");
1547    }
1548    ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1549    if (u_strcmp(sjisResult, asciiResult) != 0)
1550    {
1551        log_err("Fixing file separator for SJIS failed.\n");
1552    }
1553    ucnv_close(sjis_cnv);
1554    ucnv_close(ascii_cnv);
1555#endif
1556}
1557
1558static void
1559TestSignatureDetection(){
1560    /* with null terminated strings */
1561    {
1562        static const char* data[] = {
1563                "\xFE\xFF\x00\x00",     /* UTF-16BE */
1564                "\xFF\xFE\x00\x00",     /* UTF-16LE */
1565                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1566                "\x0E\xFE\xFF\x00",     /* SCSU     */
1567
1568                "\xFE\xFF",             /* UTF-16BE */
1569                "\xFF\xFE",             /* UTF-16LE */
1570                "\xEF\xBB\xBF",         /* UTF-8    */
1571                "\x0E\xFE\xFF",         /* SCSU     */
1572
1573                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1574                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1575                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1576                "\x0E\xFE\xFF\x41",     /* SCSU     */
1577
1578                "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1579                "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1580                "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1581                "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1582                "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1583
1584                "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1585        };
1586        static const char* expected[] = {
1587                "UTF-16BE",
1588                "UTF-16LE",
1589                "UTF-8",
1590                "SCSU",
1591
1592                "UTF-16BE",
1593                "UTF-16LE",
1594                "UTF-8",
1595                "SCSU",
1596
1597                "UTF-16BE",
1598                "UTF-16LE",
1599                "UTF-8",
1600                "SCSU",
1601
1602                "UTF-7",
1603                "UTF-7",
1604                "UTF-7",
1605                "UTF-7",
1606                "UTF-7",
1607                "UTF-EBCDIC"
1608        };
1609        static const int32_t expectedLength[] ={
1610            2,
1611            2,
1612            3,
1613            3,
1614
1615            2,
1616            2,
1617            3,
1618            3,
1619
1620            2,
1621            2,
1622            3,
1623            3,
1624
1625            5,
1626            4,
1627            4,
1628            4,
1629            4,
1630            4
1631        };
1632        int i=0;
1633        UErrorCode err;
1634        int32_t signatureLength = -1;
1635        const char* source = NULL;
1636        const char* enc = NULL;
1637        for( ; i<sizeof(data)/sizeof(char*); i++){
1638            err = U_ZERO_ERROR;
1639            source = data[i];
1640            enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1641            if(U_FAILURE(err)){
1642                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1643                continue;
1644            }
1645            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1646                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1647                continue;
1648            }
1649            if(signatureLength != expectedLength[i]){
1650                log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1651            }
1652        }
1653    }
1654    {
1655        static const char* data[] = {
1656                "\xFE\xFF\x00",         /* UTF-16BE */
1657                "\xFF\xFE\x00",         /* UTF-16LE */
1658                "\xEF\xBB\xBF\x00",     /* UTF-8    */
1659                "\x0E\xFE\xFF\x00",     /* SCSU     */
1660                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1661                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1662                "\xFE\xFF",             /* UTF-16BE */
1663                "\xFF\xFE",             /* UTF-16LE */
1664                "\xEF\xBB\xBF",         /* UTF-8    */
1665                "\x0E\xFE\xFF",         /* SCSU     */
1666                "\x00\x00\xFE\xFF",     /* UTF-32BE */
1667                "\xFF\xFE\x00\x00",     /* UTF-32LE */
1668                "\xFE\xFF\x41\x42",     /* UTF-16BE */
1669                "\xFF\xFE\x41\x41",     /* UTF-16LE */
1670                "\xEF\xBB\xBF\x41",     /* UTF-8    */
1671                "\x0E\xFE\xFF\x41",     /* SCSU     */
1672                "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1673                "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1674                "\xFB\xEE\x28",         /* BOCU-1   */
1675                "\xFF\x41\x42"          /* NULL     */
1676        };
1677        static const int len[] = {
1678            3,
1679            3,
1680            4,
1681            4,
1682            4,
1683            4,
1684            2,
1685            2,
1686            3,
1687            3,
1688            4,
1689            4,
1690            4,
1691            4,
1692            4,
1693            4,
1694            5,
1695            5,
1696            3,
1697            3
1698        };
1699
1700        static const char* expected[] = {
1701                "UTF-16BE",
1702                "UTF-16LE",
1703                "UTF-8",
1704                "SCSU",
1705                "UTF-32BE",
1706                "UTF-32LE",
1707                "UTF-16BE",
1708                "UTF-16LE",
1709                "UTF-8",
1710                "SCSU",
1711                "UTF-32BE",
1712                "UTF-32LE",
1713                "UTF-16BE",
1714                "UTF-16LE",
1715                "UTF-8",
1716                "SCSU",
1717                "UTF-32BE",
1718                "UTF-32LE",
1719                "BOCU-1",
1720                NULL
1721        };
1722        static const int32_t expectedLength[] ={
1723            2,
1724            2,
1725            3,
1726            3,
1727            4,
1728            4,
1729            2,
1730            2,
1731            3,
1732            3,
1733            4,
1734            4,
1735            2,
1736            2,
1737            3,
1738            3,
1739            4,
1740            4,
1741            3,
1742            0
1743        };
1744        int i=0;
1745        UErrorCode err;
1746        int32_t signatureLength = -1;
1747        int32_t sourceLength=-1;
1748        const char* source = NULL;
1749        const char* enc = NULL;
1750        for( ; i<sizeof(data)/sizeof(char*); i++){
1751            err = U_ZERO_ERROR;
1752            source = data[i];
1753            sourceLength = len[i];
1754            enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1755            if(U_FAILURE(err)){
1756                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1757                continue;
1758            }
1759            if(enc == NULL || strcmp(enc,expected[i]) !=0){
1760                if(expected[i] !=NULL){
1761                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1762                 continue;
1763                }
1764            }
1765            if(signatureLength != expectedLength[i]){
1766                log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1767            }
1768        }
1769    }
1770}
1771
1772void
1773static TestUTF7() {
1774    /* test input */
1775    static const uint8_t in[]={
1776        /* H - +Jjo- - ! +- +2AHcAQ */
1777        0x48,
1778        0x2d,
1779        0x2b, 0x4a, 0x6a, 0x6f,
1780        0x2d, 0x2d,
1781        0x21,
1782        0x2b, 0x2d,
1783        0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1784    };
1785
1786    /* expected test results */
1787    static const int32_t results[]={
1788        /* number of bytes read, code point */
1789        1, 0x48,
1790        1, 0x2d,
1791        4, 0x263a, /* <WHITE SMILING FACE> */
1792        2, 0x2d,
1793        1, 0x21,
1794        2, 0x2b,
1795        7, 0x10401
1796    };
1797
1798    const char *cnvName;
1799    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1800    UErrorCode errorCode=U_ZERO_ERROR;
1801    UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1802    if(U_FAILURE(errorCode)) {
1803        log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1804        return;
1805    }
1806    TestNextUChar(cnv, source, limit, results, "UTF-7");
1807    /* Test the condition when source >= sourceLimit */
1808    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1809    cnvName = ucnv_getName(cnv, &errorCode);
1810    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1811        log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1812    }
1813    ucnv_close(cnv);
1814}
1815
1816void
1817static TestIMAP() {
1818    /* test input */
1819    static const uint8_t in[]={
1820        /* H - &Jjo- - ! &- &2AHcAQ- \ */
1821        0x48,
1822        0x2d,
1823        0x26, 0x4a, 0x6a, 0x6f,
1824        0x2d, 0x2d,
1825        0x21,
1826        0x26, 0x2d,
1827        0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1828    };
1829
1830    /* expected test results */
1831    static const int32_t results[]={
1832        /* number of bytes read, code point */
1833        1, 0x48,
1834        1, 0x2d,
1835        4, 0x263a, /* <WHITE SMILING FACE> */
1836        2, 0x2d,
1837        1, 0x21,
1838        2, 0x26,
1839        7, 0x10401
1840    };
1841
1842    const char *cnvName;
1843    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1844    UErrorCode errorCode=U_ZERO_ERROR;
1845    UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1846    if(U_FAILURE(errorCode)) {
1847        log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1848        return;
1849    }
1850    TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1851    /* Test the condition when source >= sourceLimit */
1852    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1853    cnvName = ucnv_getName(cnv, &errorCode);
1854    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1855        log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1856    }
1857    ucnv_close(cnv);
1858}
1859
1860void
1861static TestUTF8() {
1862    /* test input */
1863    static const uint8_t in[]={
1864        0x61,
1865        0xc2, 0x80,
1866        0xe0, 0xa0, 0x80,
1867        0xf0, 0x90, 0x80, 0x80,
1868        0xf4, 0x84, 0x8c, 0xa1,
1869        0xf0, 0x90, 0x90, 0x81
1870    };
1871
1872    /* expected test results */
1873    static const int32_t results[]={
1874        /* number of bytes read, code point */
1875        1, 0x61,
1876        2, 0x80,
1877        3, 0x800,
1878        4, 0x10000,
1879        4, 0x104321,
1880        4, 0x10401
1881    };
1882
1883    /* error test input */
1884    static const uint8_t in2[]={
1885        0x61,
1886        0xc0, 0x80,                     /* illegal non-shortest form */
1887        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1888        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1889        0xc0, 0xc0,                     /* illegal trail byte */
1890        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1891        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1892        0xfe,                           /* illegal byte altogether */
1893        0x62
1894    };
1895
1896    /* expected error test results */
1897    static const int32_t results2[]={
1898        /* number of bytes read, code point */
1899        1, 0x61,
1900        22, 0x62
1901    };
1902
1903    UConverterToUCallback cb;
1904    const void *p;
1905
1906    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1907    UErrorCode errorCode=U_ZERO_ERROR;
1908    UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1909    if(U_FAILURE(errorCode)) {
1910        log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1911        return;
1912    }
1913    TestNextUChar(cnv, source, limit, results, "UTF-8");
1914    /* Test the condition when source >= sourceLimit */
1915    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1916
1917    /* test error behavior with a skip callback */
1918    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1919    source=(const char *)in2;
1920    limit=(const char *)(in2+sizeof(in2));
1921    TestNextUChar(cnv, source, limit, results2, "UTF-8");
1922
1923    ucnv_close(cnv);
1924}
1925
1926void
1927static TestCESU8() {
1928    /* test input */
1929    static const uint8_t in[]={
1930        0x61,
1931        0xc2, 0x80,
1932        0xe0, 0xa0, 0x80,
1933        0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1934        0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1935        0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1936        0xef, 0xbf, 0xbc
1937    };
1938
1939    /* expected test results */
1940    static const int32_t results[]={
1941        /* number of bytes read, code point */
1942        1, 0x61,
1943        2, 0x80,
1944        3, 0x800,
1945        6, 0x10000,
1946        3, 0xdc01,
1947        -1,0xd802,  /* may read 3 or 6 bytes */
1948        -1,0x10ffff,/* may read 0 or 3 bytes */
1949        3, 0xfffc
1950    };
1951
1952    /* error test input */
1953    static const uint8_t in2[]={
1954        0x61,
1955        0xc0, 0x80,                     /* illegal non-shortest form */
1956        0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1957        0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1958        0xc0, 0xc0,                     /* illegal trail byte */
1959        0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
1960        0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
1961        0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
1962        0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1963        0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1964        0xfe,                           /* illegal byte altogether */
1965        0x62
1966    };
1967
1968    /* expected error test results */
1969    static const int32_t results2[]={
1970        /* number of bytes read, code point */
1971        1, 0x61,
1972        34, 0x62
1973    };
1974
1975    UConverterToUCallback cb;
1976    const void *p;
1977
1978    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1979    UErrorCode errorCode=U_ZERO_ERROR;
1980    UConverter *cnv=ucnv_open("CESU-8", &errorCode);
1981    if(U_FAILURE(errorCode)) {
1982        log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
1983        return;
1984    }
1985    TestNextUChar(cnv, source, limit, results, "CESU-8");
1986    /* Test the condition when source >= sourceLimit */
1987    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1988
1989    /* test error behavior with a skip callback */
1990    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1991    source=(const char *)in2;
1992    limit=(const char *)(in2+sizeof(in2));
1993    TestNextUChar(cnv, source, limit, results2, "CESU-8");
1994
1995    ucnv_close(cnv);
1996}
1997
1998void
1999static TestUTF16() {
2000    /* test input */
2001    static const uint8_t in1[]={
2002        0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2003    };
2004    static const uint8_t in2[]={
2005        0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2006    };
2007    static const uint8_t in3[]={
2008        0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2009    };
2010
2011    /* expected test results */
2012    static const int32_t results1[]={
2013        /* number of bytes read, code point */
2014        4, 0x4e00,
2015        2, 0xfeff
2016    };
2017    static const int32_t results2[]={
2018        /* number of bytes read, code point */
2019        4, 0x004e,
2020        2, 0xfffe
2021    };
2022    static const int32_t results3[]={
2023        /* number of bytes read, code point */
2024        2, 0xfefe,
2025        2, 0x4e00,
2026        2, 0xfeff,
2027        4, 0x20001
2028    };
2029
2030    const char *source, *limit;
2031
2032    UErrorCode errorCode=U_ZERO_ERROR;
2033    UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2034    if(U_FAILURE(errorCode)) {
2035        log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2036        return;
2037    }
2038
2039    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2040    TestNextUChar(cnv, source, limit, results1, "UTF-16");
2041
2042    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2043    ucnv_resetToUnicode(cnv);
2044    TestNextUChar(cnv, source, limit, results2, "UTF-16");
2045
2046    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2047    ucnv_resetToUnicode(cnv);
2048    TestNextUChar(cnv, source, limit, results3, "UTF-16");
2049
2050    /* Test the condition when source >= sourceLimit */
2051    ucnv_resetToUnicode(cnv);
2052    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2053
2054    ucnv_close(cnv);
2055}
2056
2057void
2058static TestUTF16BE() {
2059    /* test input */
2060    static const uint8_t in[]={
2061        0x00, 0x61,
2062        0x00, 0xc0,
2063        0x00, 0x31,
2064        0x00, 0xf4,
2065        0xce, 0xfe,
2066        0xd8, 0x01, 0xdc, 0x01
2067    };
2068
2069    /* expected test results */
2070    static const int32_t results[]={
2071        /* number of bytes read, code point */
2072        2, 0x61,
2073        2, 0xc0,
2074        2, 0x31,
2075        2, 0xf4,
2076        2, 0xcefe,
2077        4, 0x10401
2078    };
2079
2080    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2081    UErrorCode errorCode=U_ZERO_ERROR;
2082    UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2083    if(U_FAILURE(errorCode)) {
2084        log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2085        return;
2086    }
2087    TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2088    /* Test the condition when source >= sourceLimit */
2089    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2090    /*Test for the condition where there is an invalid character*/
2091    {
2092        static const uint8_t source2[]={0x61};
2093        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2094        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2095    }
2096#if 0
2097    /*
2098     * Test disabled because currently the UTF-16BE/LE converters are supposed
2099     * to not set errors for unpaired surrogates.
2100     * This may change with
2101     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2102     */
2103
2104    /*Test for the condition where there is a surrogate pair*/
2105    {
2106        const uint8_t source2[]={0xd8, 0x01};
2107        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2108    }
2109#endif
2110    ucnv_close(cnv);
2111}
2112
2113static void
2114TestUTF16LE() {
2115    /* test input */
2116    static const uint8_t in[]={
2117        0x61, 0x00,
2118        0x31, 0x00,
2119        0x4e, 0x2e,
2120        0x4e, 0x00,
2121        0x01, 0xd8, 0x01, 0xdc
2122    };
2123
2124    /* expected test results */
2125    static const int32_t results[]={
2126        /* number of bytes read, code point */
2127        2, 0x61,
2128        2, 0x31,
2129        2, 0x2e4e,
2130        2, 0x4e,
2131        4, 0x10401
2132    };
2133
2134    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2135    UErrorCode errorCode=U_ZERO_ERROR;
2136    UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2137    if(U_FAILURE(errorCode)) {
2138        log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2139        return;
2140    }
2141    TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2142    /* Test the condition when source >= sourceLimit */
2143    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2144    /*Test for the condition where there is an invalid character*/
2145    {
2146        static const uint8_t source2[]={0x61};
2147        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2148        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2149    }
2150#if 0
2151    /*
2152     * Test disabled because currently the UTF-16BE/LE converters are supposed
2153     * to not set errors for unpaired surrogates.
2154     * This may change with
2155     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2156     */
2157
2158    /*Test for the condition where there is a surrogate character*/
2159    {
2160        static const uint8_t source2[]={0x01, 0xd8};
2161        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2162    }
2163#endif
2164
2165    ucnv_close(cnv);
2166}
2167
2168void
2169static TestUTF32() {
2170    /* test input */
2171    static const uint8_t in1[]={
2172        0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2173    };
2174    static const uint8_t in2[]={
2175        0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2176    };
2177    static const uint8_t in3[]={
2178        0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2179    };
2180
2181    /* expected test results */
2182    static const int32_t results1[]={
2183        /* number of bytes read, code point */
2184        8, 0x100f00,
2185        4, 0xfeff
2186    };
2187    static const int32_t results2[]={
2188        /* number of bytes read, code point */
2189        8, 0x0f1000,
2190        4, 0xfffe
2191    };
2192    static const int32_t results3[]={
2193        /* number of bytes read, code point */
2194        4, 0xfefe,
2195        4, 0x100f00,
2196        4, 0xfffd, /* unmatched surrogate */
2197        4, 0xfffd  /* unmatched surrogate */
2198    };
2199
2200    const char *source, *limit;
2201
2202    UErrorCode errorCode=U_ZERO_ERROR;
2203    UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2204    if(U_FAILURE(errorCode)) {
2205        log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2206        return;
2207    }
2208
2209    source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2210    TestNextUChar(cnv, source, limit, results1, "UTF-32");
2211
2212    source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2213    ucnv_resetToUnicode(cnv);
2214    TestNextUChar(cnv, source, limit, results2, "UTF-32");
2215
2216    source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2217    ucnv_resetToUnicode(cnv);
2218    TestNextUChar(cnv, source, limit, results3, "UTF-32");
2219
2220    /* Test the condition when source >= sourceLimit */
2221    ucnv_resetToUnicode(cnv);
2222    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2223
2224    ucnv_close(cnv);
2225}
2226
2227static void
2228TestUTF32BE() {
2229    /* test input */
2230    static const uint8_t in[]={
2231        0x00, 0x00, 0x00, 0x61,
2232        0x00, 0x00, 0x30, 0x61,
2233        0x00, 0x00, 0xdc, 0x00,
2234        0x00, 0x00, 0xd8, 0x00,
2235        0x00, 0x00, 0xdf, 0xff,
2236        0x00, 0x00, 0xff, 0xfe,
2237        0x00, 0x10, 0xab, 0xcd,
2238        0x00, 0x10, 0xff, 0xff
2239    };
2240
2241    /* expected test results */
2242    static const int32_t results[]={
2243        /* number of bytes read, code point */
2244        4, 0x61,
2245        4, 0x3061,
2246        4, 0xfffd,
2247        4, 0xfffd,
2248        4, 0xfffd,
2249        4, 0xfffe,
2250        4, 0x10abcd,
2251        4, 0x10ffff
2252    };
2253
2254    /* error test input */
2255    static const uint8_t in2[]={
2256        0x00, 0x00, 0x00, 0x61,
2257        0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2258        0x00, 0x00, 0x00, 0x62,
2259        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2260        0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2261        0x00, 0x00, 0x01, 0x62,
2262        0x00, 0x00, 0x02, 0x62
2263    };
2264
2265    /* expected error test results */
2266    static const int32_t results2[]={
2267        /* number of bytes read, code point */
2268        4,  0x61,
2269        8,  0x62,
2270        12, 0x162,
2271        4,  0x262
2272    };
2273
2274    UConverterToUCallback cb;
2275    const void *p;
2276
2277    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2278    UErrorCode errorCode=U_ZERO_ERROR;
2279    UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2280    if(U_FAILURE(errorCode)) {
2281        log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2282        return;
2283    }
2284    TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2285
2286    /* Test the condition when source >= sourceLimit */
2287    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2288
2289    /* test error behavior with a skip callback */
2290    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2291    source=(const char *)in2;
2292    limit=(const char *)(in2+sizeof(in2));
2293    TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2294
2295    ucnv_close(cnv);
2296}
2297
2298static void
2299TestUTF32LE() {
2300    /* test input */
2301    static const uint8_t in[]={
2302        0x61, 0x00, 0x00, 0x00,
2303        0x61, 0x30, 0x00, 0x00,
2304        0x00, 0xdc, 0x00, 0x00,
2305        0x00, 0xd8, 0x00, 0x00,
2306        0xff, 0xdf, 0x00, 0x00,
2307        0xfe, 0xff, 0x00, 0x00,
2308        0xcd, 0xab, 0x10, 0x00,
2309        0xff, 0xff, 0x10, 0x00
2310    };
2311
2312    /* expected test results */
2313    static const int32_t results[]={
2314        /* number of bytes read, code point */
2315        4, 0x61,
2316        4, 0x3061,
2317        4, 0xfffd,
2318        4, 0xfffd,
2319        4, 0xfffd,
2320        4, 0xfffe,
2321        4, 0x10abcd,
2322        4, 0x10ffff
2323    };
2324
2325    /* error test input */
2326    static const uint8_t in2[]={
2327        0x61, 0x00, 0x00, 0x00,
2328        0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2329        0x62, 0x00, 0x00, 0x00,
2330        0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2331        0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2332        0x62, 0x01, 0x00, 0x00,
2333        0x62, 0x02, 0x00, 0x00,
2334    };
2335
2336    /* expected error test results */
2337    static const int32_t results2[]={
2338        /* number of bytes read, code point */
2339        4,  0x61,
2340        8,  0x62,
2341        12, 0x162,
2342        4,  0x262,
2343    };
2344
2345    UConverterToUCallback cb;
2346    const void *p;
2347
2348    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2349    UErrorCode errorCode=U_ZERO_ERROR;
2350    UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2351    if(U_FAILURE(errorCode)) {
2352        log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2353        return;
2354    }
2355    TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2356
2357    /* Test the condition when source >= sourceLimit */
2358    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2359
2360    /* test error behavior with a skip callback */
2361    ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2362    source=(const char *)in2;
2363    limit=(const char *)(in2+sizeof(in2));
2364    TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2365
2366    ucnv_close(cnv);
2367}
2368
2369static void
2370TestLATIN1() {
2371    /* test input */
2372    static const uint8_t in[]={
2373       0x61,
2374       0x31,
2375       0x32,
2376       0xc0,
2377       0xf0,
2378       0xf4,
2379    };
2380
2381    /* expected test results */
2382    static const int32_t results[]={
2383        /* number of bytes read, code point */
2384        1, 0x61,
2385        1, 0x31,
2386        1, 0x32,
2387        1, 0xc0,
2388        1, 0xf0,
2389        1, 0xf4,
2390    };
2391    static const uint16_t in1[] = {
2392        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2393        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2394        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2395        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2396        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2397        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2398        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2399        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2400        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2401        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2402        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2403        0xcb, 0x82
2404    };
2405    static const uint8_t out1[] = {
2406        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2407        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2408        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2409        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2410        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2411        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2412        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2413        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2414        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2415        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2416        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2417        0xcb, 0x82
2418    };
2419    static const uint16_t in2[]={
2420        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2421        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2422        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2423        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2424        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2425        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2426        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2427        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2428        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2429        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2430        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2431        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2432        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2433        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2434        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2435        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2436        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2437        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2438        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2439        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2440        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2441        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2442        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2443        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2444        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2445        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2446        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2447        0x37, 0x20, 0x2A, 0x2F,
2448    };
2449    static const unsigned char out2[]={
2450        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2451        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2452        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2453        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2454        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2455        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2456        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2457        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2458        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2459        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2460        0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2461        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2462        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2463        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2464        0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2465        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2466        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2467        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2468        0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2469        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2470        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2471        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2472        0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2473        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2474        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2475        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2476        0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2477        0x37, 0x20, 0x2A, 0x2F,
2478    };
2479    const char *source=(const char *)in;
2480    const char *limit=(const char *)in+sizeof(in);
2481
2482    UErrorCode errorCode=U_ZERO_ERROR;
2483    UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2484    if(U_FAILURE(errorCode)) {
2485        log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2486        return;
2487    }
2488    TestNextUChar(cnv, source, limit, results, "LATIN_1");
2489    /* Test the condition when source >= sourceLimit */
2490    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2491    TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2492    TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2493
2494    ucnv_close(cnv);
2495}
2496
2497static void
2498TestSBCS() {
2499    /* test input */
2500    static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2501    /* expected test results */
2502    static const int32_t results[]={
2503        /* number of bytes read, code point */
2504        1, 0x61,
2505        1, 0xbf,
2506        1, 0xc4,
2507        1, 0x2021,
2508        1, 0xf8ff,
2509        1, 0x00d9
2510    };
2511
2512    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2513    UErrorCode errorCode=U_ZERO_ERROR;
2514    UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2515    if(U_FAILURE(errorCode)) {
2516        log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2517        return;
2518    }
2519    TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2520    /* Test the condition when source >= sourceLimit */
2521    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2522    /*Test for Illegal character */ /*
2523    {
2524    static const uint8_t input1[]={ 0xA1 };
2525    const char* illegalsource=(const char*)input1;
2526    TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2527    }
2528   */
2529    ucnv_close(cnv);
2530}
2531
2532static void
2533TestDBCS() {
2534    /* test input */
2535    static const uint8_t in[]={
2536        0x44, 0x6a,
2537        0xc4, 0x9c,
2538        0x7a, 0x74,
2539        0x46, 0xab,
2540        0x42, 0x5b,
2541
2542    };
2543
2544    /* expected test results */
2545    static const int32_t results[]={
2546        /* number of bytes read, code point */
2547        2, 0x00a7,
2548        2, 0xe1d2,
2549        2, 0x6962,
2550        2, 0xf842,
2551        2, 0xffe5,
2552    };
2553
2554    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2555    UErrorCode errorCode=U_ZERO_ERROR;
2556
2557    UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2558    if(U_FAILURE(errorCode)) {
2559        log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2560        return;
2561    }
2562    TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2563    /* Test the condition when source >= sourceLimit */
2564    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2565    /*Test for the condition where there is an invalid character*/
2566    {
2567        static const uint8_t source2[]={0x1a, 0x1b};
2568        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2569    }
2570    /*Test for the condition where we have a truncated char*/
2571    {
2572        static const uint8_t source1[]={0xc4};
2573        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2574        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2575    }
2576    ucnv_close(cnv);
2577}
2578
2579static void
2580TestMBCS() {
2581    /* test input */
2582    static const uint8_t in[]={
2583        0x01,
2584        0xa6, 0xa3,
2585        0x00,
2586        0xa6, 0xa1,
2587        0x08,
2588        0xc2, 0x76,
2589        0xc2, 0x78,
2590
2591    };
2592
2593    /* expected test results */
2594    static const int32_t results[]={
2595        /* number of bytes read, code point */
2596        1, 0x0001,
2597        2, 0x250c,
2598        1, 0x0000,
2599        2, 0x2500,
2600        1, 0x0008,
2601        2, 0xd60c,
2602        2, 0xd60e,
2603    };
2604
2605    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2606    UErrorCode errorCode=U_ZERO_ERROR;
2607
2608    UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2609    if(U_FAILURE(errorCode)) {
2610        log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2611        return;
2612    }
2613    TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2614    /* Test the condition when source >= sourceLimit */
2615    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2616    /*Test for the condition where there is an invalid character*/
2617    {
2618        static const uint8_t source2[]={0xa1, 0x80};
2619        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2620    }
2621    /*Test for the condition where we have a truncated char*/
2622    {
2623        static const uint8_t source1[]={0xc4};
2624        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2625        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2626    }
2627    ucnv_close(cnv);
2628
2629}
2630
2631#ifdef U_ENABLE_GENERIC_ISO_2022
2632
2633static void
2634TestISO_2022() {
2635    /* test input */
2636    static const uint8_t in[]={
2637        0x1b, 0x25, 0x42,
2638        0x31,
2639        0x32,
2640        0x61,
2641        0xc2, 0x80,
2642        0xe0, 0xa0, 0x80,
2643        0xf0, 0x90, 0x80, 0x80
2644    };
2645
2646
2647
2648    /* expected test results */
2649    static const int32_t results[]={
2650        /* number of bytes read, code point */
2651        4, 0x0031,  /* 4 bytes including the escape sequence */
2652        1, 0x0032,
2653        1, 0x61,
2654        2, 0x80,
2655        3, 0x800,
2656        4, 0x10000
2657    };
2658
2659    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2660    UErrorCode errorCode=U_ZERO_ERROR;
2661    UConverter *cnv;
2662
2663    cnv=ucnv_open("ISO_2022", &errorCode);
2664    if(U_FAILURE(errorCode)) {
2665        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2666        return;
2667    }
2668    TestNextUChar(cnv, source, limit, results, "ISO_2022");
2669
2670    /* Test the condition when source >= sourceLimit */
2671    TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2672    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2673    /*Test for the condition where we have a truncated char*/
2674    {
2675        static const uint8_t source1[]={0xc4};
2676        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2677        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2678    }
2679    /*Test for the condition where there is an invalid character*/
2680    {
2681        static const uint8_t source2[]={0xa1, 0x01};
2682        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2683    }
2684    ucnv_close(cnv);
2685}
2686
2687#endif
2688
2689static void
2690TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2691    const UChar* uSource;
2692    const UChar* uSourceLimit;
2693    const char* cSource;
2694    const char* cSourceLimit;
2695    UChar *uTargetLimit =NULL;
2696    UChar *uTarget;
2697    char *cTarget;
2698    const char *cTargetLimit;
2699    char *cBuf;
2700    UChar *uBuf,*test;
2701    int32_t uBufSize = 120;
2702    int len=0;
2703    int i=2;
2704    UErrorCode errorCode=U_ZERO_ERROR;
2705    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2706    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2707    ucnv_reset(cnv);
2708    for(;--i>0; ){
2709        uSource = (UChar*) source;
2710        uSourceLimit=(const UChar*)sourceLimit;
2711        cTarget = cBuf;
2712        uTarget = uBuf;
2713        cSource = cBuf;
2714        cTargetLimit = cBuf;
2715        uTargetLimit = uBuf;
2716
2717        do{
2718
2719            cTargetLimit = cTargetLimit+ i;
2720            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2721            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2722               errorCode=U_ZERO_ERROR;
2723                continue;
2724            }
2725
2726            if(U_FAILURE(errorCode)){
2727                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2728                return;
2729            }
2730
2731        }while (uSource<uSourceLimit);
2732
2733        cSourceLimit =cTarget;
2734        do{
2735            uTargetLimit=uTargetLimit+i;
2736            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2737            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2738               errorCode=U_ZERO_ERROR;
2739                continue;
2740            }
2741            if(U_FAILURE(errorCode)){
2742                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2743                    return;
2744            }
2745        }while(cSource<cSourceLimit);
2746
2747        uSource = source;
2748        test =uBuf;
2749        for(len=0;len<(int)(source - sourceLimit);len++){
2750            if(uBuf[len]!=uSource[len]){
2751                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2752            }
2753        }
2754    }
2755    free(uBuf);
2756    free(cBuf);
2757}
2758/* Test for Jitterbug 778 */
2759static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2760    const UChar* uSource;
2761    const UChar* uSourceLimit;
2762    const char* cSource;
2763    UChar *uTargetLimit =NULL;
2764    UChar *uTarget;
2765    char *cTarget;
2766    const char *cTargetLimit;
2767    char *cBuf;
2768    UChar *uBuf,*test;
2769    int32_t uBufSize = 120;
2770    int numCharsInTarget=0;
2771    UErrorCode errorCode=U_ZERO_ERROR;
2772    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2773    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2774    uSource = source;
2775    uSourceLimit=sourceLimit;
2776    cTarget = cBuf;
2777    cTargetLimit = cBuf +uBufSize*5;
2778    uTarget = uBuf;
2779    uTargetLimit = uBuf+ uBufSize*5;
2780    ucnv_reset(cnv);
2781    numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2782    if(U_FAILURE(errorCode)){
2783        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2784        return;
2785    }
2786    cSource = cBuf;
2787    test =uBuf;
2788    ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2789    if(U_FAILURE(errorCode)){
2790        log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2791        return;
2792    }
2793    uSource = source;
2794    while(uSource<uSourceLimit){
2795        if(*test!=*uSource){
2796
2797            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2798        }
2799        uSource++;
2800        test++;
2801    }
2802    free(uBuf);
2803    free(cBuf);
2804}
2805
2806static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2807    const UChar* uSource;
2808    const UChar* uSourceLimit;
2809    const char* cSource;
2810    const char* cSourceLimit;
2811    UChar *uTargetLimit =NULL;
2812    UChar *uTarget;
2813    char *cTarget;
2814    const char *cTargetLimit;
2815    char *cBuf;
2816    UChar *uBuf,*test;
2817    int32_t uBufSize = 120;
2818    int len=0;
2819    int i=2;
2820    const UChar *temp = sourceLimit;
2821    UErrorCode errorCode=U_ZERO_ERROR;
2822    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2823    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2824
2825    ucnv_reset(cnv);
2826    for(;--i>0;){
2827        uSource = (UChar*) source;
2828        cTarget = cBuf;
2829        uTarget = uBuf;
2830        cSource = cBuf;
2831        cTargetLimit = cBuf;
2832        uTargetLimit = uBuf+uBufSize*5;
2833        cTargetLimit = cTargetLimit+uBufSize*10;
2834        uSourceLimit=uSource;
2835        do{
2836
2837            if (uSourceLimit < sourceLimit) {
2838                uSourceLimit = uSourceLimit+1;
2839            }
2840            ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2841            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2842               errorCode=U_ZERO_ERROR;
2843                continue;
2844            }
2845
2846            if(U_FAILURE(errorCode)){
2847                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2848                return;
2849            }
2850
2851        }while (uSource<temp);
2852
2853        cSourceLimit =cBuf;
2854        do{
2855            if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2856                cSourceLimit = cSourceLimit+1;
2857            }
2858            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2859            if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2860               errorCode=U_ZERO_ERROR;
2861                continue;
2862            }
2863            if(U_FAILURE(errorCode)){
2864                   log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2865                    return;
2866            }
2867        }while(cSource<cTarget);
2868
2869        uSource = source;
2870        test =uBuf;
2871        for(;len<(int)(source - sourceLimit);len++){
2872            if(uBuf[len]!=uSource[len]){
2873                log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2874            }
2875        }
2876    }
2877    free(uBuf);
2878    free(cBuf);
2879}
2880static void
2881TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2882                     const uint16_t results[], const char* message){
2883     const char* s0;
2884     const char* s=(char*)source;
2885     const uint16_t *r=results;
2886     UErrorCode errorCode=U_ZERO_ERROR;
2887     uint32_t c,exC;
2888     ucnv_reset(cnv);
2889     while(s<limit) {
2890        s0=s;
2891        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2892        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2893            break; /* no more significant input */
2894        } else if(U_FAILURE(errorCode)) {
2895            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2896            break;
2897        } else {
2898            if(UTF_IS_FIRST_SURROGATE(*r)){
2899                int i =0, len = 2;
2900                UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
2901                r++;
2902            }else{
2903                exC = *r;
2904            }
2905            if(c!=(uint32_t)(exC))
2906                log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
2907        }
2908        r++;
2909    }
2910}
2911
2912static int TestJitterbug930(const char* enc){
2913    UErrorCode err = U_ZERO_ERROR;
2914    UConverter*converter;
2915    char out[80];
2916    char*target = out;
2917    UChar in[4];
2918    const UChar*source = in;
2919    int32_t off[80];
2920    int32_t* offsets = off;
2921    int numOffWritten=0;
2922    UBool flush = 0;
2923    converter = my_ucnv_open(enc, &err);
2924
2925    in[0] = 0x41;     /* 0x4E00;*/
2926    in[1] = 0x4E01;
2927    in[2] = 0x4E02;
2928    in[3] = 0x4E03;
2929
2930    memset(off, '*', sizeof(off));
2931
2932    ucnv_fromUnicode (converter,
2933            &target,
2934            target+2,
2935            &source,
2936            source+3,
2937            offsets,
2938            flush,
2939            &err);
2940
2941        /* writes three bytes into the output buffer: 41 1B 24
2942        * but offsets contains 0 1 1
2943    */
2944    while(*offsets< off[10]){
2945        numOffWritten++;
2946        offsets++;
2947    }
2948    log_verbose("Testing Jitterbug 930 for encoding %s",enc);
2949    if(numOffWritten!= (int)(target-out)){
2950        log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
2951    }
2952
2953    err = U_ZERO_ERROR;
2954
2955    memset(off,'*' , sizeof(off));
2956
2957    flush = 1;
2958    offsets=off;
2959    ucnv_fromUnicode (converter,
2960            &target,
2961            target+4,
2962            &source,
2963            source,
2964            offsets,
2965            flush,
2966            &err);
2967    numOffWritten=0;
2968    while(*offsets< off[10]){
2969        numOffWritten++;
2970        if(*offsets!= -1){
2971            log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
2972        }
2973        offsets++;
2974    }
2975
2976    /* writes 42 43 7A into output buffer,
2977     * offsets contains -1 -1 -1
2978     */
2979    ucnv_close(converter);
2980    return 0;
2981}
2982
2983static void
2984TestHZ() {
2985    /* test input */
2986    static const uint16_t in[]={
2987            0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
2988            0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
2989            0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
2990            0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
2991            0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
2992            0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
2993            0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
2994            0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
2995            0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
2996            0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
2997            0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
2998            0x005A, 0x005B, 0x005C, 0x000A
2999      };
3000    const UChar* uSource;
3001    const UChar* uSourceLimit;
3002    const char* cSource;
3003    const char* cSourceLimit;
3004    UChar *uTargetLimit =NULL;
3005    UChar *uTarget;
3006    char *cTarget;
3007    const char *cTargetLimit;
3008    char *cBuf;
3009    UChar *uBuf,*test;
3010    int32_t uBufSize = 120;
3011    UErrorCode errorCode=U_ZERO_ERROR;
3012    UConverter *cnv;
3013    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3014    int32_t* myOff= offsets;
3015    cnv=ucnv_open("HZ", &errorCode);
3016    if(U_FAILURE(errorCode)) {
3017        log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3018        return;
3019    }
3020
3021    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3022    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3023    uSource = (const UChar*)in;
3024    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3025    cTarget = cBuf;
3026    cTargetLimit = cBuf +uBufSize*5;
3027    uTarget = uBuf;
3028    uTargetLimit = uBuf+ uBufSize*5;
3029    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3030    if(U_FAILURE(errorCode)){
3031        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3032        return;
3033    }
3034    cSource = cBuf;
3035    cSourceLimit =cTarget;
3036    test =uBuf;
3037    myOff=offsets;
3038    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3039    if(U_FAILURE(errorCode)){
3040        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3041        return;
3042    }
3043    uSource = (const UChar*)in;
3044    while(uSource<uSourceLimit){
3045        if(*test!=*uSource){
3046
3047            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3048        }
3049        uSource++;
3050        test++;
3051    }
3052    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3053    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3054    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3055    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3056    TestJitterbug930("csISO2022JP");
3057    ucnv_close(cnv);
3058    free(offsets);
3059    free(uBuf);
3060    free(cBuf);
3061}
3062
3063static void
3064TestISCII(){
3065        /* test input */
3066    static const uint16_t in[]={
3067        /* test full range of Devanagari */
3068        0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3069        0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3070        0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3071        0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3072        0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3073        0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3074        0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3075        0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3076        0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3077        0x096D,0x096E,0x096F,
3078        /* test Soft halant*/
3079        0x0915,0x094d, 0x200D,
3080        /* test explicit halant */
3081        0x0915,0x094d, 0x200c,
3082        /* test double danda */
3083        0x965,
3084        /* test ASCII */
3085        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3086        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3087        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3088        /* tests from Lotus */
3089        0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3090        0x0930,0x094D,0x200D,
3091        0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3092        0x0915,0x0921,0x002B,0x095F,
3093        /* tamil range */
3094        0x0B86, 0xB87, 0xB88,
3095        /* telugu range */
3096        0x0C05, 0x0C02, 0x0C03,0x0c31,
3097        /* kannada range */
3098        0x0C85, 0xC82, 0x0C83,
3099        /* test Abbr sign and Anudatta */
3100        0x0970, 0x952,
3101       /* 0x0958,
3102        0x0959,
3103        0x095A,
3104        0x095B,
3105        0x095C,
3106        0x095D,
3107        0x095E,
3108        0x095F,*/
3109        0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3110        0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3111        0x090C ,
3112        0x0962,
3113        0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3114        0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3115        0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3116        0x093D /* Avagraha  0xEA, 0xE9*/,
3117        0x0958,
3118        0x0959,
3119        0x095A,
3120        0x095B,
3121        0x095C,
3122        0x095D,
3123        0x095E,
3124        0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3125      };
3126    static const unsigned char byteArr[]={
3127
3128        0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3129        0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3130        0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3131        0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3132        0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3133        0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3134        0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3135        0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3136        0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3137        0xf8,0xf9,0xfa,
3138        /* test soft halant */
3139        0xb3, 0xE8, 0xE9,
3140        /* test explicit halant */
3141        0xb3, 0xE8, 0xE8,
3142        /* test double danda */
3143        0xea, 0xea,
3144        /* test ASCII */
3145        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3146        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3147        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3148        /* test ATR code */
3149
3150        /* tests from Lotus */
3151        0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3152        0xEF,0x42,0xCF,0xE8,0xD9,
3153        0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3154        0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3155        /* tamil range */
3156        0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3157        /* telugu range */
3158        0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3159        /* kannada range */
3160        0xEF, 0x48,0xa4, 0xa2, 0xa3,
3161        /* anudatta and abbreviation sign */
3162        0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3163
3164
3165        0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3166
3167        0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3168
3169        0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3170
3171        0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3172
3173        0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3174
3175        0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3176
3177        0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3178
3179        0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3180
3181        0xB3, 0xE9, /* Ka + NUKTA */
3182
3183        0xB4, 0xE9, /* Kha + NUKTA */
3184
3185        0xB5, 0xE9, /* Ga + NUKTA */
3186
3187        0xBA, 0xE9,
3188
3189        0xBF, 0xE9,
3190
3191        0xC0, 0xE9,
3192
3193        0xC9, 0xE9,
3194        /* INV halant RA    */
3195        0xD9, 0xE8, 0xCF,
3196        0x00, 0x00A0,
3197        /* just consume unhandled codepoints */
3198        0xEF, 0x30,
3199
3200    };
3201    testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3202    TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3203
3204}
3205
3206static void
3207TestISO_2022_JP() {
3208    /* test input */
3209    static const uint16_t in[]={
3210        0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3211        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3212        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3213        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3214        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3215        0x201D, 0x3014, 0x000D, 0x000A,
3216        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3217        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3218        };
3219    const UChar* uSource;
3220    const UChar* uSourceLimit;
3221    const char* cSource;
3222    const char* cSourceLimit;
3223    UChar *uTargetLimit =NULL;
3224    UChar *uTarget;
3225    char *cTarget;
3226    const char *cTargetLimit;
3227    char *cBuf;
3228    UChar *uBuf,*test;
3229    int32_t uBufSize = 120;
3230    UErrorCode errorCode=U_ZERO_ERROR;
3231    UConverter *cnv;
3232    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3233    int32_t* myOff= offsets;
3234    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3235    if(U_FAILURE(errorCode)) {
3236        log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3237        return;
3238    }
3239
3240    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3241    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3242    uSource = (const UChar*)in;
3243    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3244    cTarget = cBuf;
3245    cTargetLimit = cBuf +uBufSize*5;
3246    uTarget = uBuf;
3247    uTargetLimit = uBuf+ uBufSize*5;
3248    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3249    if(U_FAILURE(errorCode)){
3250        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3251        return;
3252    }
3253    cSource = cBuf;
3254    cSourceLimit =cTarget;
3255    test =uBuf;
3256    myOff=offsets;
3257    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3258    if(U_FAILURE(errorCode)){
3259        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3260        return;
3261    }
3262
3263    uSource = (const UChar*)in;
3264    while(uSource<uSourceLimit){
3265        if(*test!=*uSource){
3266
3267            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3268        }
3269        uSource++;
3270        test++;
3271    }
3272
3273    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3274    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3275    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3276    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3277    TestJitterbug930("csISO2022JP");
3278    ucnv_close(cnv);
3279    free(uBuf);
3280    free(cBuf);
3281    free(offsets);
3282}
3283
3284static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3285    const UChar* uSource;
3286    const UChar* uSourceLimit;
3287    const char* cSource;
3288    const char* cSourceLimit;
3289    UChar *uTargetLimit =NULL;
3290    UChar *uTarget;
3291    char *cTarget;
3292    const char *cTargetLimit;
3293    char *cBuf;
3294    UChar *uBuf,*test;
3295    int32_t uBufSize = 120*10;
3296    UErrorCode errorCode=U_ZERO_ERROR;
3297    UConverter *cnv;
3298    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3299    int32_t* myOff= offsets;
3300    cnv=my_ucnv_open(conv, &errorCode);
3301    if(U_FAILURE(errorCode)) {
3302        log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3303        return;
3304    }
3305
3306    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3307    cBuf =(char*)malloc(uBufSize * sizeof(char));
3308    uSource = (const UChar*)in;
3309    uSourceLimit=uSource+len;
3310    cTarget = cBuf;
3311    cTargetLimit = cBuf +uBufSize;
3312    uTarget = uBuf;
3313    uTargetLimit = uBuf+ uBufSize;
3314    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3315    if(U_FAILURE(errorCode)){
3316        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3317        return;
3318    }
3319    /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3320    cSource = cBuf;
3321    cSourceLimit =cTarget;
3322    test =uBuf;
3323    myOff=offsets;
3324    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3325    if(U_FAILURE(errorCode)){
3326        log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3327        return;
3328    }
3329
3330    uSource = (const UChar*)in;
3331    while(uSource<uSourceLimit){
3332        if(*test!=*uSource){
3333            log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3334        }
3335        uSource++;
3336        test++;
3337    }
3338    TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3339    TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3340    TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3341    if(byteArr && byteArrLen!=0){
3342        TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3343        TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3344        {
3345            cSource = byteArr;
3346            cSourceLimit = cSource+byteArrLen;
3347            test=uBuf;
3348            myOff = offsets;
3349            ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3350            if(U_FAILURE(errorCode)){
3351                log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3352                return;
3353            }
3354
3355            uSource = (const UChar*)in;
3356            while(uSource<uSourceLimit){
3357                if(*test!=*uSource){
3358                    log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3359                }
3360                uSource++;
3361                test++;
3362            }
3363        }
3364    }
3365
3366    ucnv_close(cnv);
3367    free(uBuf);
3368    free(cBuf);
3369    free(offsets);
3370}
3371static UChar U_CALLCONV
3372_charAt(int32_t offset, void *context) {
3373    return ((char*)context)[offset];
3374}
3375
3376static int32_t
3377unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3378    int32_t srcIndex=0;
3379    int32_t dstIndex=0;
3380    if(U_FAILURE(*status)){
3381        return 0;
3382    }
3383    if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3384        *status = U_ILLEGAL_ARGUMENT_ERROR;
3385        return 0;
3386    }
3387    if(srcLen==-1){
3388        srcLen = (int32_t)uprv_strlen(src);
3389    }
3390
3391    for (; srcIndex<srcLen; ) {
3392        UChar32 c = src[srcIndex++];
3393        if (c == 0x005C /*'\\'*/) {
3394            c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3395            if (c == (UChar32)0xFFFFFFFF) {
3396                *status=U_INVALID_CHAR_FOUND; /* return empty string */
3397                break; /* invalid escape sequence */
3398            }
3399        }
3400        if(dstIndex < dstLen){
3401            if(c>0xFFFF){
3402               dst[dstIndex++] = UTF16_LEAD(c);
3403               if(dstIndex<dstLen){
3404                    dst[dstIndex]=UTF16_TRAIL(c);
3405               }else{
3406                   *status=U_BUFFER_OVERFLOW_ERROR;
3407               }
3408            }else{
3409                dst[dstIndex]=(UChar)c;
3410            }
3411
3412        }else{
3413            *status = U_BUFFER_OVERFLOW_ERROR;
3414        }
3415        dstIndex++; /* for preflighting */
3416    }
3417    return dstIndex;
3418}
3419
3420static void
3421TestFullRoundtrip(const char* cp){
3422    UChar usource[10] ={0};
3423    UChar nsrc[10] = {0};
3424    uint32_t i=1;
3425    int len=0, ulen;
3426    nsrc[0]=0x0061;
3427    /* Test codepoint 0 */
3428    TestConv(usource,1,cp,"",NULL,0);
3429    TestConv(usource,2,cp,"",NULL,0);
3430    nsrc[2]=0x5555;
3431    TestConv(nsrc,3,cp,"",NULL,0);
3432
3433    for(;i<=0x10FFFF;i++){
3434        if(i==0xD800){
3435            i=0xDFFF;
3436            continue;
3437        }
3438        if(i<=0xFFFF){
3439            usource[0] =(UChar) i;
3440            len=1;
3441        }else{
3442            usource[0]=UTF16_LEAD(i);
3443            usource[1]=UTF16_TRAIL(i);
3444            len=2;
3445        }
3446        ulen=len;
3447        if(i==0x80) {
3448            usource[2]=0;
3449        }
3450        /* Test only single code points */
3451        TestConv(usource,ulen,cp,"",NULL,0);
3452        /* Test codepoint repeated twice */
3453        usource[ulen]=usource[0];
3454        usource[ulen+1]=usource[1];
3455        ulen+=len;
3456        TestConv(usource,ulen,cp,"",NULL,0);
3457        /* Test codepoint repeated 3 times */
3458        usource[ulen]=usource[0];
3459        usource[ulen+1]=usource[1];
3460        ulen+=len;
3461        TestConv(usource,ulen,cp,"",NULL,0);
3462        /* Test codepoint in between 2 codepoints */
3463        nsrc[1]=usource[0];
3464        nsrc[2]=usource[1];
3465        nsrc[len+1]=0x5555;
3466        TestConv(nsrc,len+2,cp,"",NULL,0);
3467        uprv_memset(usource,0,sizeof(UChar)*10);
3468    }
3469}
3470
3471static void
3472TestRoundTrippingAllUTF(void){
3473    if(!QUICK){
3474        log_verbose("Running exhaustive round trip test for BOCU-1\n");
3475        TestFullRoundtrip("BOCU-1");
3476        log_verbose("Running exhaustive round trip test for SCSU\n");
3477        TestFullRoundtrip("SCSU");
3478        log_verbose("Running exhaustive round trip test for UTF-8\n");
3479        TestFullRoundtrip("UTF-8");
3480        log_verbose("Running exhaustive round trip test for CESU-8\n");
3481        TestFullRoundtrip("CESU-8");
3482        log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3483        TestFullRoundtrip("UTF-16BE");
3484        log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3485        TestFullRoundtrip("UTF-16LE");
3486        log_verbose("Running exhaustive round trip test for UTF-16\n");
3487        TestFullRoundtrip("UTF-16");
3488        log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3489        TestFullRoundtrip("UTF-32BE");
3490        log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3491        TestFullRoundtrip("UTF-32LE");
3492        log_verbose("Running exhaustive round trip test for UTF-32\n");
3493        TestFullRoundtrip("UTF-32");
3494        log_verbose("Running exhaustive round trip test for UTF-7\n");
3495        TestFullRoundtrip("UTF-7");
3496        log_verbose("Running exhaustive round trip test for UTF-7\n");
3497        TestFullRoundtrip("UTF-7,version=1");
3498        log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3499        TestFullRoundtrip("IMAP-mailbox-name");
3500        log_verbose("Running exhaustive round trip test for GB18030\n");
3501        TestFullRoundtrip("GB18030");
3502    }
3503}
3504
3505static void
3506TestSCSU() {
3507
3508    static const uint16_t germanUTF16[]={
3509        0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3510    };
3511
3512    static const uint8_t germanSCSU[]={
3513        0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3514    };
3515
3516    static const uint16_t russianUTF16[]={
3517        0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3518    };
3519
3520    static const uint8_t russianSCSU[]={
3521        0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3522    };
3523
3524    static const uint16_t japaneseUTF16[]={
3525        0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3526        0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3527        0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3528        0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3529        0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3530        0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3531        0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3532        0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3533        0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3534        0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3535        0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3536        0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3537        0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3538        0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3539        0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3540    };
3541
3542    /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3543     it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3544    static const uint8_t japaneseSCSU[]={
3545        0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3546        0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3547        0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3548        0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3549        0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3550        0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3551        0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3552        0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3553        0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3554        0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3555        0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3556        0xcb, 0x82
3557    };
3558
3559    static const uint16_t allFeaturesUTF16[]={
3560        0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3561        0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3562        0x01df, 0xf000, 0xdbff, 0xdfff
3563    };
3564
3565    /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3566     * result here (34B vs. 35B)
3567     */
3568    static const uint8_t allFeaturesSCSU[]={
3569        0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3570        0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3571        0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3572        0xdf, 0x14, 0x80, 0x15, 0xff
3573    };
3574    static const uint16_t monkeyIn[]={
3575        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3576        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3577        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3578        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3579        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3580        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3581        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3582        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3583        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3584        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3585        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3586        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3587        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3588        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3589        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3590        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3591        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3592        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3593        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3594        /* test non-BMP code points */
3595        0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3596        0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3597        0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3598        0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3599        0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3600        0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3601        0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3602        0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3603        0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3604        0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3605        0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3606
3607
3608        0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3609        0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3610        0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3611        0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3612        0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3613    };
3614    static const char *fTestCases [] = {
3615          "\\ud800\\udc00", /* smallest surrogate*/
3616          "\\ud8ff\\udcff",
3617          "\\udBff\\udFff", /* largest surrogate pair*/
3618          "\\ud834\\udc00",
3619          "\\U0010FFFF",
3620          "Hello \\u9292 \\u9192 World!",
3621          "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3622          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3623
3624          "\\u0648\\u06c8", /* catch missing reset*/
3625          "\\u0648\\u06c8",
3626
3627          "\\u4444\\uE001", /* lowest quotable*/
3628          "\\u4444\\uf2FF", /* highest quotable*/
3629          "\\u4444\\uf188\\u4444",
3630          "\\u4444\\uf188\\uf288",
3631          "\\u4444\\uf188abc\\u0429\\uf288",
3632          "\\u9292\\u2222",
3633          "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3634          "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3635          "Hello World!123456",
3636          "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3637
3638          "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3639          "abc\\u4411d",      /* uses SQU*/
3640          "abc\\u4411\\u4412d",/* uses SCU*/
3641          "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3642          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3643          "\\u9292\\u2222",
3644          "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3645          "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3646          "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3647
3648          "", /* empty input*/
3649          "\\u0000", /* smallest BMP character*/
3650          "\\uFFFF", /* largest BMP character*/
3651
3652          /* regression tests*/
3653          "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3654          "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3655          "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3656          "\\u0041\\u00df\\u0401\\u015f",
3657          "\\u9066\\u2123abc",
3658          "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3659          "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3660    };
3661    int i=0;
3662    for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3663        const char* cSrc = fTestCases[i];
3664        UErrorCode status = U_ZERO_ERROR;
3665        int32_t cSrcLen,srcLen;
3666        UChar* src;
3667        /* UConverter* cnv = ucnv_open("SCSU",&status); */
3668        cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3669        src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3670        srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3671        log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3672        TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3673        free(src);
3674    }
3675    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3676    TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3677    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3678    TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3679    TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3680    TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3681    TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3682}
3683
3684#if !UCONFIG_NO_LEGACY_CONVERSION
3685static void TestJitterbug2346(){
3686    char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3687                      0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3688    uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3689
3690    UChar uTarget[500]={'\0'};
3691    UChar* utarget=uTarget;
3692    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3693
3694    char cTarget[500]={'\0'};
3695    char* ctarget=cTarget;
3696    char* ctargetLimit=cTarget+sizeof(cTarget);
3697    const char* csource=source;
3698    UChar* temp = expected;
3699    UErrorCode err=U_ZERO_ERROR;
3700
3701    UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3702    if(U_FAILURE(err)) {
3703        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3704        return;
3705    }
3706    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3707    if(U_FAILURE(err)) {
3708        log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3709        return;
3710    }
3711    utargetLimit=utarget;
3712    utarget = uTarget;
3713    while(utarget<utargetLimit){
3714        if(*temp!=*utarget){
3715
3716            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3717        }
3718        utarget++;
3719        temp++;
3720    }
3721    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3722    if(U_FAILURE(err)) {
3723        log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3724        return;
3725    }
3726    ctargetLimit=ctarget;
3727    ctarget =cTarget;
3728    ucnv_close(conv);
3729
3730
3731}
3732
3733static void
3734TestISO_2022_JP_1() {
3735    /* test input */
3736    static const uint16_t in[]={
3737        0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3738        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3739        0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3740        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3741        0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3742        0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3743        0x201D, 0x000D, 0x000A,
3744        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3745        0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3746        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3747        0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3748        0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3749        0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3750      };
3751    const UChar* uSource;
3752    const UChar* uSourceLimit;
3753    const char* cSource;
3754    const char* cSourceLimit;
3755    UChar *uTargetLimit =NULL;
3756    UChar *uTarget;
3757    char *cTarget;
3758    const char *cTargetLimit;
3759    char *cBuf;
3760    UChar *uBuf,*test;
3761    int32_t uBufSize = 120;
3762    UErrorCode errorCode=U_ZERO_ERROR;
3763    UConverter *cnv;
3764
3765    cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3766    if(U_FAILURE(errorCode)) {
3767        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3768        return;
3769    }
3770
3771    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3772    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3773    uSource = (const UChar*)in;
3774    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3775    cTarget = cBuf;
3776    cTargetLimit = cBuf +uBufSize*5;
3777    uTarget = uBuf;
3778    uTargetLimit = uBuf+ uBufSize*5;
3779    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3780    if(U_FAILURE(errorCode)){
3781        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3782        return;
3783    }
3784    cSource = cBuf;
3785    cSourceLimit =cTarget;
3786    test =uBuf;
3787    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3788    if(U_FAILURE(errorCode)){
3789        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3790        return;
3791    }
3792    uSource = (const UChar*)in;
3793    while(uSource<uSourceLimit){
3794        if(*test!=*uSource){
3795
3796            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3797        }
3798        uSource++;
3799        test++;
3800    }
3801    /*ucnv_close(cnv);
3802    cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3803    /*Test for the condition where there is an invalid character*/
3804    ucnv_reset(cnv);
3805    {
3806        static const uint8_t source2[]={0x0e,0x24,0x053};
3807        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3808    }
3809    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3810    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3811    ucnv_close(cnv);
3812    free(uBuf);
3813    free(cBuf);
3814}
3815
3816static void
3817TestISO_2022_JP_2() {
3818    /* test input */
3819    static const uint16_t in[]={
3820        0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3821        0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3822        0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3823        0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3824        0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3825        0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3826        0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3827        0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3828        0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3829        0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3830        0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3831        0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3832        0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3833        0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3834        0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3835        0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3836        0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3837        0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3838        0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3839      };
3840    const UChar* uSource;
3841    const UChar* uSourceLimit;
3842    const char* cSource;
3843    const char* cSourceLimit;
3844    UChar *uTargetLimit =NULL;
3845    UChar *uTarget;
3846    char *cTarget;
3847    const char *cTargetLimit;
3848    char *cBuf;
3849    UChar *uBuf,*test;
3850    int32_t uBufSize = 120;
3851    UErrorCode errorCode=U_ZERO_ERROR;
3852    UConverter *cnv;
3853    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3854    int32_t* myOff= offsets;
3855    cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3856    if(U_FAILURE(errorCode)) {
3857        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3858        return;
3859    }
3860
3861    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3862    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3863    uSource = (const UChar*)in;
3864    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3865    cTarget = cBuf;
3866    cTargetLimit = cBuf +uBufSize*5;
3867    uTarget = uBuf;
3868    uTargetLimit = uBuf+ uBufSize*5;
3869    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3870    if(U_FAILURE(errorCode)){
3871        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3872        return;
3873    }
3874    cSource = cBuf;
3875    cSourceLimit =cTarget;
3876    test =uBuf;
3877    myOff=offsets;
3878    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3879    if(U_FAILURE(errorCode)){
3880        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3881        return;
3882    }
3883    uSource = (const UChar*)in;
3884    while(uSource<uSourceLimit){
3885        if(*test!=*uSource){
3886
3887            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3888        }
3889        uSource++;
3890        test++;
3891    }
3892    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3893    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3894    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3895    /*Test for the condition where there is an invalid character*/
3896    ucnv_reset(cnv);
3897    {
3898        static const uint8_t source2[]={0x0e,0x24,0x053};
3899        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3900    }
3901    ucnv_close(cnv);
3902    free(uBuf);
3903    free(cBuf);
3904    free(offsets);
3905}
3906
3907static void
3908TestISO_2022_KR() {
3909    /* test input */
3910    static const uint16_t in[]={
3911                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3912                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3913                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3914                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3915                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3916                   ,0x53E3,0x53E4,0x000A,0x000D};
3917    const UChar* uSource;
3918    const UChar* uSourceLimit;
3919    const char* cSource;
3920    const char* cSourceLimit;
3921    UChar *uTargetLimit =NULL;
3922    UChar *uTarget;
3923    char *cTarget;
3924    const char *cTargetLimit;
3925    char *cBuf;
3926    UChar *uBuf,*test;
3927    int32_t uBufSize = 120;
3928    UErrorCode errorCode=U_ZERO_ERROR;
3929    UConverter *cnv;
3930    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3931    int32_t* myOff= offsets;
3932    cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
3933    if(U_FAILURE(errorCode)) {
3934        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3935        return;
3936    }
3937
3938    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3939    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3940    uSource = (const UChar*)in;
3941    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3942    cTarget = cBuf;
3943    cTargetLimit = cBuf +uBufSize*5;
3944    uTarget = uBuf;
3945    uTargetLimit = uBuf+ uBufSize*5;
3946    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3947    if(U_FAILURE(errorCode)){
3948        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3949        return;
3950    }
3951    cSource = cBuf;
3952    cSourceLimit =cTarget;
3953    test =uBuf;
3954    myOff=offsets;
3955    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3956    if(U_FAILURE(errorCode)){
3957        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3958        return;
3959    }
3960    uSource = (const UChar*)in;
3961    while(uSource<uSourceLimit){
3962        if(*test!=*uSource){
3963            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
3964        }
3965        uSource++;
3966        test++;
3967    }
3968    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
3969    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3970    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3971    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3972    TestJitterbug930("csISO2022KR");
3973    /*Test for the condition where there is an invalid character*/
3974    ucnv_reset(cnv);
3975    {
3976        static const uint8_t source2[]={0x1b,0x24,0x053};
3977        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
3978        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
3979    }
3980    ucnv_close(cnv);
3981    free(uBuf);
3982    free(cBuf);
3983    free(offsets);
3984}
3985
3986static void
3987TestISO_2022_KR_1() {
3988    /* test input */
3989    static const uint16_t in[]={
3990                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3991                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3992                   ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3993                   ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3994                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3995                   ,0x53E3,0x53E4,0x000A,0x000D};
3996    const UChar* uSource;
3997    const UChar* uSourceLimit;
3998    const char* cSource;
3999    const char* cSourceLimit;
4000    UChar *uTargetLimit =NULL;
4001    UChar *uTarget;
4002    char *cTarget;
4003    const char *cTargetLimit;
4004    char *cBuf;
4005    UChar *uBuf,*test;
4006    int32_t uBufSize = 120;
4007    UErrorCode errorCode=U_ZERO_ERROR;
4008    UConverter *cnv;
4009    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4010    int32_t* myOff= offsets;
4011    cnv=ucnv_open("ibm-25546", &errorCode);
4012    if(U_FAILURE(errorCode)) {
4013        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4014        return;
4015    }
4016
4017    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4018    cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4019    uSource = (const UChar*)in;
4020    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4021    cTarget = cBuf;
4022    cTargetLimit = cBuf +uBufSize*5;
4023    uTarget = uBuf;
4024    uTargetLimit = uBuf+ uBufSize*5;
4025    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4026    if(U_FAILURE(errorCode)){
4027        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4028        return;
4029    }
4030    cSource = cBuf;
4031    cSourceLimit =cTarget;
4032    test =uBuf;
4033    myOff=offsets;
4034    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4035    if(U_FAILURE(errorCode)){
4036        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4037        return;
4038    }
4039    uSource = (const UChar*)in;
4040    while(uSource<uSourceLimit){
4041        if(*test!=*uSource){
4042            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4043        }
4044        uSource++;
4045        test++;
4046    }
4047    ucnv_reset(cnv);
4048    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4049    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4050    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4051    ucnv_reset(cnv);
4052    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4053        /*Test for the condition where there is an invalid character*/
4054    ucnv_reset(cnv);
4055    {
4056        static const uint8_t source2[]={0x1b,0x24,0x053};
4057        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4058        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4059    }
4060    ucnv_close(cnv);
4061    free(uBuf);
4062    free(cBuf);
4063    free(offsets);
4064}
4065
4066static void TestJitterbug2411(){
4067    static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4068                         "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4069    UConverter* kr=NULL, *kr1=NULL;
4070    UErrorCode errorCode = U_ZERO_ERROR;
4071    UChar tgt[100]={'\0'};
4072    UChar* target = tgt;
4073    UChar* targetLimit = target+100;
4074    kr=ucnv_open("iso-2022-kr", &errorCode);
4075    if(U_FAILURE(errorCode)) {
4076        log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4077        return;
4078    }
4079    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4080    if(U_FAILURE(errorCode)) {
4081        log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4082        return;
4083    }
4084    kr1 = ucnv_open("ibm-25546", &errorCode);
4085    if(U_FAILURE(errorCode)) {
4086        log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4087        return;
4088    }
4089    target = tgt;
4090    targetLimit = target+100;
4091    ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4092
4093    if(U_FAILURE(errorCode)) {
4094        log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4095        return;
4096    }
4097
4098    ucnv_close(kr);
4099    ucnv_close(kr1);
4100
4101}
4102
4103static void
4104TestJIS(){
4105    /* From Unicode moved to testdata/conversion.txt */
4106    /*To Unicode*/
4107    {
4108        static const uint8_t sampleTextJIS[] = {
4109            0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4110            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4111            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4112        };
4113        static const uint16_t expectedISO2022JIS[] = {
4114            0x0041, 0x0042,
4115            0xFF81, 0xFF82,
4116            0x3000
4117        };
4118        static const int32_t  toISO2022JISOffs[]={
4119            3,4,
4120            8,9,
4121            16
4122        };
4123
4124        static const uint8_t sampleTextJIS7[] = {
4125            0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4126            0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4127            0x1b,0x24,0x42,0x21,0x21,
4128            0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4129            0x21,0x22,
4130            0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4131        };
4132        static const uint16_t expectedISO2022JIS7[] = {
4133            0x0041, 0x0042,
4134            0xFF81, 0xFF82,
4135            0x3000,
4136            0xFF81, 0xFF82,
4137            0x3001,
4138            0x3000
4139        };
4140        static const int32_t  toISO2022JIS7Offs[]={
4141            3,4,
4142            8,9,
4143            13,16,
4144            17,
4145            19,27
4146        };
4147        static const uint8_t sampleTextJIS8[] = {
4148            0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4149            0xa1,0xc8,0xd9,/*Katakana Set*/
4150            0x1b,0x28,0x42,
4151            0x41,0x42,
4152            0xb1,0xc3, /*Katakana Set*/
4153            0x1b,0x24,0x42,0x21,0x21
4154        };
4155        static const uint16_t expectedISO2022JIS8[] = {
4156            0x0041, 0x0042,
4157            0xff61, 0xff88, 0xff99,
4158            0x0041, 0x0042,
4159            0xff71, 0xff83,
4160            0x3000
4161        };
4162        static const int32_t  toISO2022JIS8Offs[]={
4163            3, 4,  5,  6,
4164            7, 11, 12, 13,
4165            14, 18,
4166        };
4167
4168        testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4169            sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4170        testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4171            sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4172        testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4173            sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4174    }
4175
4176}
4177
4178static void TestJitterbug915(){
4179/* tests for roundtripping of the below sequence
4180\x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4181\x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4182\x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4183\x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4184\x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4185\x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4186\x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4187*/
4188    static const char cSource[]={
4189        0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4190        0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4191        0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4192        0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4193        0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4194        0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4195        0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4196        0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4197        0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4198        0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4199        0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4200        0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4201        0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4202        0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4203        0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4204        0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4205        0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4206        0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4207        0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4208        0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4209        0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4210        0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4211        0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4212        0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4213        0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4214        0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4215        0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4216        0x37, 0x20, 0x2A, 0x2F
4217    };
4218    UChar uTarget[500]={'\0'};
4219    UChar* utarget=uTarget;
4220    UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4221
4222    char cTarget[500]={'\0'};
4223    char* ctarget=cTarget;
4224    char* ctargetLimit=cTarget+sizeof(cTarget);
4225    const char* csource=cSource;
4226    const char* tempSrc = cSource;
4227    UErrorCode err=U_ZERO_ERROR;
4228
4229    UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4230    if(U_FAILURE(err)) {
4231        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4232        return;
4233    }
4234    ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4235    if(U_FAILURE(err)) {
4236        log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4237        return;
4238    }
4239    utargetLimit=utarget;
4240    utarget = uTarget;
4241    ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4242    if(U_FAILURE(err)) {
4243        log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4244        return;
4245    }
4246    ctargetLimit=ctarget;
4247    ctarget =cTarget;
4248    while(ctarget<ctargetLimit){
4249        if(*ctarget != *tempSrc){
4250            log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4251        }
4252        ++ctarget;
4253        ++tempSrc;
4254    }
4255
4256    ucnv_close(conv);
4257}
4258
4259static void
4260TestISO_2022_CN_EXT() {
4261    /* test input */
4262    static const uint16_t in[]={
4263                /* test Non-BMP code points */
4264         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4265         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4266         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4267         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4268         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4269         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4270         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4271         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4272         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4273         0xD869, 0xDED5,
4274
4275         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4276         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4277         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4278         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4279         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4280         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4281         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4282         0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4283         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4284         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4285         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4286         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4287         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4288         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4289         0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4290         0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4291         0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4292         0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4293
4294         0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4295
4296      };
4297
4298    const UChar* uSource;
4299    const UChar* uSourceLimit;
4300    const char* cSource;
4301    const char* cSourceLimit;
4302    UChar *uTargetLimit =NULL;
4303    UChar *uTarget;
4304    char *cTarget;
4305    const char *cTargetLimit;
4306    char *cBuf;
4307    UChar *uBuf,*test;
4308    int32_t uBufSize = 180;
4309    UErrorCode errorCode=U_ZERO_ERROR;
4310    UConverter *cnv;
4311    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4312    int32_t* myOff= offsets;
4313    cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4314    if(U_FAILURE(errorCode)) {
4315        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4316        return;
4317    }
4318
4319    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4320    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4321    uSource = (const UChar*)in;
4322    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4323    cTarget = cBuf;
4324    cTargetLimit = cBuf +uBufSize*5;
4325    uTarget = uBuf;
4326    uTargetLimit = uBuf+ uBufSize*5;
4327    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4328    if(U_FAILURE(errorCode)){
4329        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4330        return;
4331    }
4332    cSource = cBuf;
4333    cSourceLimit =cTarget;
4334    test =uBuf;
4335    myOff=offsets;
4336    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4337    if(U_FAILURE(errorCode)){
4338        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4339        return;
4340    }
4341    uSource = (const UChar*)in;
4342    while(uSource<uSourceLimit){
4343        if(*test!=*uSource){
4344            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4345        }
4346        else{
4347            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4348        }
4349        uSource++;
4350        test++;
4351    }
4352    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4353    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4354    /*Test for the condition where there is an invalid character*/
4355    ucnv_reset(cnv);
4356    {
4357        static const uint8_t source2[]={0x0e,0x24,0x053};
4358        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4359    }
4360    ucnv_close(cnv);
4361    free(uBuf);
4362    free(cBuf);
4363    free(offsets);
4364}
4365
4366static void
4367TestISO_2022_CN() {
4368    /* test input */
4369    static const uint16_t in[]={
4370         /* jitterbug 951 */
4371         0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4372         0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4373         0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4374         0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4375         0x0020, 0x0045, 0x004e, 0x0044,
4376         /**/
4377         0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4378         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4379         0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4380         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4381         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4382         0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4383         0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4384         0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4385         0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4386         0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4387         0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4388         0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4389         0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4390         0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4391         0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4392         0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4393         0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4394
4395      };
4396    const UChar* uSource;
4397    const UChar* uSourceLimit;
4398    const char* cSource;
4399    const char* cSourceLimit;
4400    UChar *uTargetLimit =NULL;
4401    UChar *uTarget;
4402    char *cTarget;
4403    const char *cTargetLimit;
4404    char *cBuf;
4405    UChar *uBuf,*test;
4406    int32_t uBufSize = 180;
4407    UErrorCode errorCode=U_ZERO_ERROR;
4408    UConverter *cnv;
4409    int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4410    int32_t* myOff= offsets;
4411    cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4412    if(U_FAILURE(errorCode)) {
4413        log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4414        return;
4415    }
4416
4417    uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4418    cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4419    uSource = (const UChar*)in;
4420    uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4421    cTarget = cBuf;
4422    cTargetLimit = cBuf +uBufSize*5;
4423    uTarget = uBuf;
4424    uTargetLimit = uBuf+ uBufSize*5;
4425    ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4426    if(U_FAILURE(errorCode)){
4427        log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4428        return;
4429    }
4430    cSource = cBuf;
4431    cSourceLimit =cTarget;
4432    test =uBuf;
4433    myOff=offsets;
4434    ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4435    if(U_FAILURE(errorCode)){
4436        log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4437        return;
4438    }
4439    uSource = (const UChar*)in;
4440    while(uSource<uSourceLimit){
4441        if(*test!=*uSource){
4442            log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4443        }
4444        else{
4445            log_verbose("      Got: \\u%04X\n",(int)*test) ;
4446        }
4447        uSource++;
4448        test++;
4449    }
4450    TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4451    TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4452    TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4453    TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4454    TestJitterbug930("csISO2022CN");
4455    /*Test for the condition where there is an invalid character*/
4456    ucnv_reset(cnv);
4457    {
4458        static const uint8_t source2[]={0x0e,0x24,0x053};
4459        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4460    }
4461
4462    ucnv_close(cnv);
4463    free(uBuf);
4464    free(cBuf);
4465    free(offsets);
4466}
4467
4468/* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4469typedef struct {
4470    const char *    converterName;
4471    const char *    inputText;
4472    int             inputTextLength;
4473} EmptySegmentTest;
4474
4475/* Callback for TestJitterbug6175, should only get called for empty segment errors */
4476static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4477                                             int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4478    if (reason > UCNV_IRREGULAR) {
4479        return;
4480    }
4481    if (reason != UCNV_IRREGULAR) {
4482        log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4483    }
4484    /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4485    *err = U_ZERO_ERROR;
4486    ucnv_cbToUWriteSub(toArgs,0,err);
4487}
4488
4489enum { kEmptySegmentToUCharsMax = 64 };
4490static void TestJitterbug6175(void) {
4491    static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4492    static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4493    static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4494    static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4495    static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4496    static const EmptySegmentTest emptySegmentTests[] = {
4497        /* converterName inputText    inputTextLength */
4498        { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4499        { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4500        { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4501        { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4502        { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4503        /* terminator: */
4504        { NULL,          NULL,        0,                  }
4505    };
4506    const EmptySegmentTest * testPtr;
4507    for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4508        UErrorCode   err = U_ZERO_ERROR;
4509        UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4510        if (U_FAILURE(err)) {
4511            log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4512            return;
4513        }
4514        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4515        if (U_FAILURE(err)) {
4516            log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4517            ucnv_close(cnv);
4518            return;
4519        }
4520        {
4521            UChar         toUChars[kEmptySegmentToUCharsMax];
4522            UChar *       toUCharsPtr = toUChars;
4523            const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4524            const char *  inCharsPtr = testPtr->inputText;
4525            const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4526            ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4527        }
4528        ucnv_close(cnv);
4529    }
4530}
4531
4532static void
4533TestEBCDIC_STATEFUL() {
4534    /* test input */
4535    static const uint8_t in[]={
4536        0x61,
4537        0x1a,
4538        0x0f, 0x4b,
4539        0x42,
4540        0x40,
4541        0x36,
4542    };
4543
4544    /* expected test results */
4545    static const int32_t results[]={
4546        /* number of bytes read, code point */
4547        1, 0x002f,
4548        1, 0x0092,
4549        2, 0x002e,
4550        1, 0xff62,
4551        1, 0x0020,
4552        1, 0x0096,
4553
4554    };
4555    static const uint8_t in2[]={
4556        0x0f,
4557        0xa1,
4558        0x01
4559    };
4560
4561    /* expected test results */
4562    static const int32_t results2[]={
4563        /* number of bytes read, code point */
4564        2, 0x203E,
4565        1, 0x0001,
4566    };
4567
4568    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4569    UErrorCode errorCode=U_ZERO_ERROR;
4570    UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4571    if(U_FAILURE(errorCode)) {
4572        log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4573        return;
4574    }
4575    TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4576    ucnv_reset(cnv);
4577     /* Test the condition when source >= sourceLimit */
4578    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4579    ucnv_reset(cnv);
4580    /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4581    {
4582        static const uint8_t source1[]={0x0f};
4583        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4584    }
4585    /*Test for the condition where there is an invalid character*/
4586    ucnv_reset(cnv);
4587    {
4588        static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4589        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4590    }
4591    ucnv_reset(cnv);
4592    source=(const char*)in2;
4593    limit=(const char*)in2+sizeof(in2);
4594    TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4595    ucnv_close(cnv);
4596
4597}
4598
4599static void
4600TestGB18030() {
4601    /* test input */
4602    static const uint8_t in[]={
4603        0x24,
4604        0x7f,
4605        0x81, 0x30, 0x81, 0x30,
4606        0xa8, 0xbf,
4607        0xa2, 0xe3,
4608        0xd2, 0xbb,
4609        0x82, 0x35, 0x8f, 0x33,
4610        0x84, 0x31, 0xa4, 0x39,
4611        0x90, 0x30, 0x81, 0x30,
4612        0xe3, 0x32, 0x9a, 0x35
4613#if 0
4614        /*
4615         * Feature removed   markus 2000-oct-26
4616         * Only some codepages must match surrogate pairs into supplementary code points -
4617         * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4618         * GB 18030 provides direct encodings for supplementary code points, therefore
4619         * it must not combine two single-encoded surrogates into one code point.
4620         */
4621        0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4622#endif
4623    };
4624
4625    /* expected test results */
4626    static const int32_t results[]={
4627        /* number of bytes read, code point */
4628        1, 0x24,
4629        1, 0x7f,
4630        4, 0x80,
4631        2, 0x1f9,
4632        2, 0x20ac,
4633        2, 0x4e00,
4634        4, 0x9fa6,
4635        4, 0xffff,
4636        4, 0x10000,
4637        4, 0x10ffff
4638#if 0
4639        /* Feature removed. See comment above. */
4640        8, 0x10000
4641#endif
4642    };
4643
4644/*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4645    UErrorCode errorCode=U_ZERO_ERROR;
4646    UConverter *cnv=ucnv_open("gb18030", &errorCode);
4647    if(U_FAILURE(errorCode)) {
4648        log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4649        return;
4650    }
4651    TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4652    ucnv_close(cnv);
4653}
4654
4655static void
4656TestLMBCS() {
4657    /* LMBCS-1 string */
4658    static const uint8_t pszLMBCS[]={
4659        0x61,
4660        0x01, 0x29,
4661        0x81,
4662        0xA0,
4663        0x0F, 0x27,
4664        0x0F, 0x91,
4665        0x14, 0x0a, 0x74,
4666        0x14, 0xF6, 0x02,
4667        0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4668        0x10, 0x88, 0xA0,
4669    };
4670
4671    /* Unicode UChar32 equivalents */
4672    static const UChar32 pszUnicode32[]={
4673        /* code point */
4674        0x00000061,
4675        0x00002013,
4676        0x000000FC,
4677        0x000000E1,
4678        0x00000007,
4679        0x00000091,
4680        0x00000a74,
4681        0x00000200,
4682        0x00023456, /* code point for surrogate pair */
4683        0x00005516
4684    };
4685
4686/* Unicode UChar equivalents */
4687    static const UChar pszUnicode[]={
4688        /* code point */
4689        0x0061,
4690        0x2013,
4691        0x00FC,
4692        0x00E1,
4693        0x0007,
4694        0x0091,
4695        0x0a74,
4696        0x0200,
4697        0xD84D, /* low surrogate */
4698        0xDC56, /* high surrogate */
4699        0x5516
4700    };
4701
4702/* expected test results */
4703    static const int offsets32[]={
4704        /* number of bytes read, code point */
4705        0,
4706        1,
4707        3,
4708        4,
4709        5,
4710        7,
4711        9,
4712        12,
4713        15,
4714        21,
4715        24
4716    };
4717
4718/* expected test results */
4719    static const int offsets[]={
4720        /* number of bytes read, code point */
4721        0,
4722        1,
4723        3,
4724        4,
4725        5,
4726        7,
4727        9,
4728        12,
4729        15,
4730        18,
4731        21,
4732        24
4733    };
4734
4735
4736    UConverter *cnv;
4737
4738#define NAME_LMBCS_1 "LMBCS-1"
4739#define NAME_LMBCS_2 "LMBCS-2"
4740
4741
4742   /* Some basic open/close/property tests on some LMBCS converters */
4743    {
4744
4745      char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4746      char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4747      char get_subchars [1];
4748      const char * get_name;
4749      UConverter *cnv1;
4750      UConverter *cnv2;
4751
4752      int8_t len = sizeof(get_subchars);
4753
4754      UErrorCode errorCode=U_ZERO_ERROR;
4755
4756      /* Open */
4757      cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4758      if(U_FAILURE(errorCode)) {
4759         log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4760         return;
4761      }
4762      cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4763      if(U_FAILURE(errorCode)) {
4764         log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4765         return;
4766      }
4767
4768      /* Name */
4769      get_name = ucnv_getName (cnv1, &errorCode);
4770      if (strcmp(NAME_LMBCS_1,get_name)){
4771         log_err("Unexpected converter name: %s\n", get_name);
4772      }
4773      get_name = ucnv_getName (cnv2, &errorCode);
4774      if (strcmp(NAME_LMBCS_2,get_name)){
4775         log_err("Unexpected converter name: %s\n", get_name);
4776      }
4777
4778      /* substitution chars */
4779      ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4780      if(U_FAILURE(errorCode)) {
4781         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4782      }
4783      if (len!=1){
4784         log_err("Unexpected length of sub chars\n");
4785      }
4786      if (get_subchars[0] != expected_subchars[0]){
4787           log_err("Unexpected value of sub chars\n");
4788      }
4789      ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4790      if(U_FAILURE(errorCode)) {
4791         log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4792      }
4793      ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4794      if(U_FAILURE(errorCode)) {
4795         log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4796      }
4797      if (len!=1){
4798         log_err("Unexpected length of sub chars\n");
4799      }
4800      if (get_subchars[0] != new_subchars[0]){
4801           log_err("Unexpected value of sub chars\n");
4802      }
4803      ucnv_close(cnv1);
4804      ucnv_close(cnv2);
4805
4806    }
4807
4808    /* LMBCS to Unicode - offsets */
4809    {
4810       UErrorCode errorCode=U_ZERO_ERROR;
4811
4812       const char * pSource = (const char *)pszLMBCS;
4813       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4814
4815       UChar Out [sizeof(pszUnicode) + 1];
4816       UChar * pOut = Out;
4817       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4818
4819       int32_t off [sizeof(offsets)];
4820
4821      /* last 'offset' in expected results is just the final size.
4822         (Makes other tests easier). Compensate here: */
4823
4824       off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4825
4826
4827
4828      cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4829      if(U_FAILURE(errorCode)) {
4830           log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4831           return;
4832      }
4833
4834
4835
4836      ucnv_toUnicode (cnv,
4837                      &pOut,
4838                      OutLimit,
4839                      &pSource,
4840                      sourceLimit,
4841                      off,
4842                      TRUE,
4843                      &errorCode);
4844
4845
4846       if (memcmp(off,offsets,sizeof(offsets)))
4847       {
4848         log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4849       }
4850       if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4851       {
4852         log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4853       }
4854       ucnv_close(cnv);
4855    }
4856    {
4857   /* LMBCS to Unicode - getNextUChar */
4858      const char * sourceStart;
4859      const char *source=(const char *)pszLMBCS;
4860      const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4861      const UChar32 *results= pszUnicode32;
4862      const int *off = offsets32;
4863
4864      UErrorCode errorCode=U_ZERO_ERROR;
4865      UChar32 uniChar;
4866
4867      cnv=ucnv_open("LMBCS-1", &errorCode);
4868      if(U_FAILURE(errorCode)) {
4869           log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4870           return;
4871      }
4872      else
4873      {
4874
4875         while(source<limit) {
4876            sourceStart=source;
4877            uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4878            if(U_FAILURE(errorCode)) {
4879                  log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4880                  break;
4881            } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4882               log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4883                   uniChar, (source-sourceStart), *results, *off);
4884               break;
4885            }
4886            results++;
4887            off++;
4888         }
4889       }
4890       ucnv_close(cnv);
4891    }
4892    { /* test locale & optimization group operations: Unicode to LMBCS */
4893
4894      UErrorCode errorCode=U_ZERO_ERROR;
4895      UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4896      UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4897      UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4898      UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4899      const UChar * pUniOut = uniString;
4900      UChar * pUniIn = uniString;
4901      uint8_t lmbcsString [4];
4902      const char * pLMBCSOut = (const char *)lmbcsString;
4903      char * pLMBCSIn = (char *)lmbcsString;
4904
4905      /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4906      ucnv_fromUnicode (cnv16he,
4907                        &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4908                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4909                        NULL, 1, &errorCode);
4910
4911      if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
4912      {
4913         log_err("LMBCS-16,locale=he gives unexpected translation\n");
4914      }
4915
4916      pLMBCSIn= (char *)lmbcsString;
4917      pUniOut = uniString;
4918      ucnv_fromUnicode (cnv01us,
4919                        &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4920                        &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4921                        NULL, 1, &errorCode);
4922
4923      if (lmbcsString[0] != 0x9F)
4924      {
4925         log_err("LMBCS-1,locale=US gives unexpected translation\n");
4926      }
4927
4928      /* single byte char from mbcs char set */
4929      lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
4930      pLMBCSOut = (const char *)lmbcsString;
4931      pUniIn = uniString;
4932      ucnv_toUnicode (cnv16jp,
4933                        &pUniIn, pUniIn + 1,
4934                        &pLMBCSOut, (pLMBCSOut + 1),
4935                        NULL, 1, &errorCode);
4936      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4937      {
4938           log_err("Unexpected results from LMBCS-16 single byte char\n");
4939      }
4940      /* convert to group 1: should be 3 bytes */
4941      pLMBCSIn = (char *)lmbcsString;
4942      pUniOut = uniString;
4943      ucnv_fromUnicode (cnv01us,
4944                        &pLMBCSIn, (const char *)(pLMBCSIn + 3),
4945                        &pUniOut, pUniOut + 1,
4946                        NULL, 1, &errorCode);
4947      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
4948         || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
4949      {
4950           log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
4951      }
4952      pLMBCSOut = (const char *)lmbcsString;
4953      pUniIn = uniString;
4954      ucnv_toUnicode (cnv01us,
4955                        &pUniIn, pUniIn + 1,
4956                        &pLMBCSOut, (const char *)(pLMBCSOut + 3),
4957                        NULL, 1, &errorCode);
4958      if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
4959      {
4960           log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
4961      }
4962      pLMBCSIn = (char *)lmbcsString;
4963      pUniOut = uniString;
4964      ucnv_fromUnicode (cnv16jp,
4965                        &pLMBCSIn, (const char *)(pLMBCSIn + 1),
4966                        &pUniOut, pUniOut + 1,
4967                        NULL, 1, &errorCode);
4968      if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
4969      {
4970           log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
4971      }
4972      ucnv_close(cnv16he);
4973      ucnv_close(cnv16jp);
4974      ucnv_close(cnv01us);
4975    }
4976    {
4977       /* Small source buffer testing, LMBCS -> Unicode */
4978
4979       UErrorCode errorCode=U_ZERO_ERROR;
4980
4981       const char * pSource = (const char *)pszLMBCS;
4982       const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4983       int codepointCount = 0;
4984
4985       UChar Out [sizeof(pszUnicode) + 1];
4986       UChar * pOut = Out;
4987       UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4988
4989
4990       cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
4991       if(U_FAILURE(errorCode)) {
4992           log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4993           return;
4994       }
4995
4996
4997       while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
4998       {
4999           ucnv_toUnicode (cnv,
5000               &pOut,
5001               OutLimit,
5002               &pSource,
5003               (pSource+1), /* claim that this is a 1- byte buffer */
5004               NULL,
5005               FALSE,    /* FALSE means there might be more chars in the next buffer */
5006               &errorCode);
5007
5008           if (U_SUCCESS (errorCode))
5009           {
5010               if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5011               {
5012                   /* we are on to the next code point: check value */
5013
5014                   if (Out[0] != pszUnicode[codepointCount]){
5015                       log_err("LMBCS->Uni result %lx should have been %lx \n",
5016                           Out[0], pszUnicode[codepointCount]);
5017                   }
5018
5019                   pOut = Out; /* reset for accumulating next code point */
5020                   codepointCount++;
5021               }
5022           }
5023           else
5024           {
5025               log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5026           }
5027       }
5028       {
5029         /* limits & surrogate error testing */
5030         char LIn [sizeof(pszLMBCS)];
5031         const char * pLIn = LIn;
5032
5033         char LOut [sizeof(pszLMBCS)];
5034         char * pLOut = LOut;
5035
5036         UChar UOut [sizeof(pszUnicode)];
5037         UChar * pUOut = UOut;
5038
5039         UChar UIn [sizeof(pszUnicode)];
5040         const UChar * pUIn = UIn;
5041
5042         int32_t off [sizeof(offsets)];
5043         UChar32 uniChar;
5044
5045         errorCode=U_ZERO_ERROR;
5046
5047         /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5048         ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode);
5049         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5050         {
5051            log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5052         }
5053         errorCode=U_ZERO_ERROR;
5054         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5055         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5056         {
5057            log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5058         }
5059         errorCode=U_ZERO_ERROR;
5060
5061         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5062         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5063         {
5064            log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5065         }
5066         errorCode=U_ZERO_ERROR;
5067
5068         /* 0 byte source request - no error, no pointer movement */
5069         ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5070         ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5071         if(U_FAILURE(errorCode)) {
5072            log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5073         }
5074         if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5075         {
5076              log_err("Unexpected pointer move in 0 byte source request \n");
5077         }
5078         /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5079         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5080         if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5081         {
5082            log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5083         }
5084         if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5085         {
5086            log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5087         }
5088         errorCode = U_ZERO_ERROR;
5089
5090         /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5091
5092         pUIn = pszUnicode;
5093         ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
5094         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5095         {
5096            log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5097         }
5098
5099         errorCode = U_ZERO_ERROR;
5100
5101         pLIn = (const char *)pszLMBCS;
5102         ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5103         if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5104         {
5105            log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5106         }
5107
5108         /* unpaired or chopped LMBCS surrogates */
5109
5110         /* OK high surrogate, Low surrogate is chopped */
5111         LIn [0] = (char)0x14;
5112         LIn [1] = (char)0xD8;
5113         LIn [2] = (char)0x01;
5114         LIn [3] = (char)0x14;
5115         LIn [4] = (char)0xDC;
5116         pLIn = LIn;
5117         errorCode = U_ZERO_ERROR;
5118         pUOut = UOut;
5119
5120         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5121         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5122         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5123         {
5124            log_err("Unexpected results on chopped low surrogate\n");
5125         }
5126
5127         /* chopped at surrogate boundary */
5128         LIn [0] = (char)0x14;
5129         LIn [1] = (char)0xD8;
5130         LIn [2] = (char)0x01;
5131         pLIn = LIn;
5132         errorCode = U_ZERO_ERROR;
5133         pUOut = UOut;
5134
5135         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5136         if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5137         {
5138            log_err("Unexpected results on chopped at surrogate boundary \n");
5139         }
5140
5141         /* unpaired surrogate plus valid Unichar */
5142         LIn [0] = (char)0x14;
5143         LIn [1] = (char)0xD8;
5144         LIn [2] = (char)0x01;
5145         LIn [3] = (char)0x14;
5146         LIn [4] = (char)0xC9;
5147         LIn [5] = (char)0xD0;
5148         pLIn = LIn;
5149         errorCode = U_ZERO_ERROR;
5150         pUOut = UOut;
5151
5152         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5153         if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5154         {
5155            log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5156         }
5157
5158      /* unpaired surrogate plus chopped Unichar */
5159         LIn [0] = (char)0x14;
5160         LIn [1] = (char)0xD8;
5161         LIn [2] = (char)0x01;
5162         LIn [3] = (char)0x14;
5163         LIn [4] = (char)0xC9;
5164
5165         pLIn = LIn;
5166         errorCode = U_ZERO_ERROR;
5167         pUOut = UOut;
5168
5169         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5170         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5171         {
5172            log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5173         }
5174
5175         /* unpaired surrogate plus valid non-Unichar */
5176         LIn [0] = (char)0x14;
5177         LIn [1] = (char)0xD8;
5178         LIn [2] = (char)0x01;
5179         LIn [3] = (char)0x0F;
5180         LIn [4] = (char)0x3B;
5181
5182         pLIn = LIn;
5183         errorCode = U_ZERO_ERROR;
5184         pUOut = UOut;
5185
5186         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5187         if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5188         {
5189            log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5190         }
5191
5192         /* unpaired surrogate plus chopped non-Unichar */
5193         LIn [0] = (char)0x14;
5194         LIn [1] = (char)0xD8;
5195         LIn [2] = (char)0x01;
5196         LIn [3] = (char)0x0F;
5197
5198         pLIn = LIn;
5199         errorCode = U_ZERO_ERROR;
5200         pUOut = UOut;
5201
5202         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5203
5204         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5205         {
5206            log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5207         }
5208       }
5209    }
5210   ucnv_close(cnv);  /* final cleanup */
5211}
5212
5213
5214static void TestJitterbug255()
5215{
5216    static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5217    const char *testBuffer = (const char *)testBytes;
5218    const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5219    UErrorCode status = U_ZERO_ERROR;
5220    UChar32 result;
5221    UConverter *cnv = 0;
5222
5223    cnv = ucnv_open("shift-jis", &status);
5224    if (U_FAILURE(status) || cnv == 0) {
5225        log_data_err("Failed to open the converter for SJIS.\n");
5226                return;
5227    }
5228    while (testBuffer != testEnd)
5229    {
5230        result = ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5231        if (U_FAILURE(status))
5232        {
5233            log_err("Failed to convert the next UChar for SJIS.\n");
5234            break;
5235        }
5236    }
5237    ucnv_close(cnv);
5238}
5239
5240static void TestEBCDICUS4XML()
5241{
5242    UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5243    static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5244    static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5245    static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5246    char target_x[] = {0x00, 0x00, 0x00, 0x00};
5247    UChar *unicodes = unicodes_x;
5248    const UChar *toUnicodeMaps = toUnicodeMaps_x;
5249    char *target = target_x;
5250    const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5251    UErrorCode status = U_ZERO_ERROR;
5252    UConverter *cnv = 0;
5253
5254    cnv = ucnv_open("ebcdic-xml-us", &status);
5255    if (U_FAILURE(status) || cnv == 0) {
5256        log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5257        return;
5258    }
5259    ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5260    if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5261        log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5262            u_errorName(status));
5263        printUSeqErr(unicodes_x, 3);
5264        printUSeqErr(toUnicodeMaps, 3);
5265    }
5266    status = U_ZERO_ERROR;
5267    ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5268    if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5269        log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5270            u_errorName(status));
5271        printSeqErr((const unsigned char*)target_x, 3);
5272        printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5273    }
5274    ucnv_close(cnv);
5275}
5276#endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5277
5278#if !UCONFIG_NO_COLLATION
5279
5280static void TestJitterbug981(){
5281    const UChar* rules;
5282    int32_t rules_length, target_cap, bytes_needed, buff_size;
5283    UErrorCode status = U_ZERO_ERROR;
5284    UConverter *utf8cnv;
5285    UCollator* myCollator;
5286    char *buff;
5287    int numNeeded=0;
5288    utf8cnv = ucnv_open ("utf8", &status);
5289    if(U_FAILURE(status)){
5290        log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5291        return;
5292    }
5293    myCollator = ucol_open("zh", &status);
5294    if(U_FAILURE(status)){
5295        log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5296        ucnv_close(utf8cnv);
5297        return;
5298    }
5299
5300    rules = ucol_getRules(myCollator, &rules_length);
5301    buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5302    buff = malloc(buff_size);
5303
5304    target_cap = 0;
5305    do {
5306        ucnv_reset(utf8cnv);
5307        status = U_ZERO_ERROR;
5308        if(target_cap >= buff_size) {
5309            log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5310            break;
5311        }
5312        bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5313            rules, rules_length, &status);
5314        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5315        if(numNeeded!=0 && numNeeded!= bytes_needed){
5316            log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5317            break;
5318        }
5319        numNeeded = bytes_needed;
5320    } while (status == U_BUFFER_OVERFLOW_ERROR);
5321    ucol_close(myCollator);
5322    ucnv_close(utf8cnv);
5323    free(buff);
5324}
5325
5326#endif
5327
5328static void TestJitterbug1293(){
5329    static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5330    char target[256];
5331    UErrorCode status = U_ZERO_ERROR;
5332    UConverter* conv=NULL;
5333    int32_t target_cap, bytes_needed, numNeeded = 0;
5334    conv = ucnv_open("shift-jis",&status);
5335    if(U_FAILURE(status)){
5336      log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5337      return;
5338    }
5339
5340    do{
5341        target_cap =0;
5342        bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5343        target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5344        if(numNeeded!=0 && numNeeded!= bytes_needed){
5345          log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5346        }
5347        numNeeded = bytes_needed;
5348    } while (status == U_BUFFER_OVERFLOW_ERROR);
5349    if(U_FAILURE(status)){
5350      log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5351      return;
5352    }
5353    ucnv_close(conv);
5354}
5355static void TestJB5275_1(){
5356
5357    static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5358                                "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5359                                /* Switch script: */
5360                                "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5361                                "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5362                                "\xEF\x40\x3B\xB3\x0A";
5363    static const UChar expected[] ={
5364            0x003b, 0x0a15, 0x000a, /* Easy characters */
5365            0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5366            0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5367            0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5368            0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5369    };
5370
5371    UErrorCode status = U_ZERO_ERROR;
5372    UConverter* conv = ucnv_open("iscii-gur", &status);
5373    UChar dest[100] = {'\0'};
5374    UChar* target = dest;
5375    UChar* targetLimit = dest+100;
5376    const char* source = data;
5377    const char* sourceLimit = data+strlen(data);
5378    const UChar* exp = expected;
5379
5380    if (U_FAILURE(status)) {
5381        log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5382        return;
5383    }
5384
5385    log_verbose("Testing switching back to default script when new line is encountered.\n");
5386    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5387    if(U_FAILURE(status)){
5388        log_err("conversion failed: %s \n", u_errorName(status));
5389    }
5390    targetLimit = target;
5391    target = dest;
5392    printUSeq(target, targetLimit-target);
5393    while(target<targetLimit){
5394        if(*exp!=*target){
5395            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5396        }
5397        target++;
5398        exp++;
5399    }
5400    ucnv_close(conv);
5401}
5402
5403static void TestJB5275(){
5404    static const char* data =
5405    /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5406    /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5407    /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5408        "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5409        "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5410        "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5411        "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5412        "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5413        "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5414        /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5415    static const UChar expected[] ={
5416        0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5417        0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5418        0x0038, 0x0C95, 0x000A, /* Kannada test */
5419        0x0039, 0x0D15, 0x000A, /* Malayalam test */
5420        0x003A, 0x0A95, 0x000A, /* Gujarati test */
5421        0x003B, 0x0A15, 0x000A, /* Punjabi test */
5422    };
5423
5424    UErrorCode status = U_ZERO_ERROR;
5425    UConverter* conv = ucnv_open("iscii", &status);
5426    UChar dest[100] = {'\0'};
5427    UChar* target = dest;
5428    UChar* targetLimit = dest+100;
5429    const char* source = data;
5430    const char* sourceLimit = data+strlen(data);
5431    const UChar* exp = expected;
5432    ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5433    if(U_FAILURE(status)){
5434        log_err("conversion failed: %s \n", u_errorName(status));
5435    }
5436    targetLimit = target;
5437    target = dest;
5438
5439    printUSeq(target, targetLimit-target);
5440
5441    while(target<targetLimit){
5442        if(*exp!=*target){
5443            log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5444        }
5445        target++;
5446        exp++;
5447    }
5448    ucnv_close(conv);
5449}
5450